# Loading the Dataset

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("./data/diabetes.csv")

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,4,183,0,0,0,28.4,0.212,36,1
1,5,162,104,0,0,37.7,0.151,52,1
2,2,197,70,99,0,34.7,0.575,62,1
3,13,158,114,0,0,42.3,0.257,44,1
4,0,162,76,56,100,53.2,0.759,25,1


# Training and evaluating a simple model (Decision Tree)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import accuracy_score

In [5]:
# Separate features from class
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [6]:
# Create a separate train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

In [7]:
# Train a Decision tree using the training set
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)

In [8]:
# Predict labels for test set
y_pred = clf.predict(X_test)

In [9]:
# Measure accuracy
acc = accuracy_score(y_test, y_pred)
print("Accuracy = " + str(acc))

Accuracy = 0.7142857142857143


# Try this out with other models (your work)

#### For a list of available models, check this link: https://scikit-learn.org/stable/supervised_learning.html

# Train your model on the entire dataset and export it

In [10]:
from joblib import dump

In [11]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)

In [12]:
dump(clf, 'my_model.joblib') 

['my_model.joblib']

# Submit your model to the <font color="red">Classification Battle</font>

Now that you trained and exported your model, let's see how your solution stacks up against the others.

Go to www.ml-battle.com (Everaldo will share the _login_ / _password_)

Use the `Submit Model` button to upload your exported model and to see where you rank. Once your model is uploaded, it will be evaluated using an out-of-sample dataset that was set aside for that purpose.

<img src="https://github.com/anfibil/classification-battle/raw/master/temp/screenshot2.png">

## _Good luck!_

## Additional notes

### Loading your saved model

In [14]:
from joblib import load

In [15]:
clf = load('my_model.joblib')

### Using your model to predict the label of an unseen instance

**Recall our list of features:** 

[Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age, Outcome]

In [68]:
patients = [[4, 183, 0, 0, 0, 50, 0.500, 50],
             [0, 50, 0, 0, 99, 29, 0.250, 38]]

In [75]:
for i, patient in enumerate(patients):
    pred = clf.predict([patient])[0]
    diagnosis = " has diabetes" if pred == 1 else " does NOT have diabetes"
    print("Patient " + str(i+1) + diagnosis)

Patient 1 has diabetes
Patient 2 does NOT have diabetes
