# Supervised Learning - Classification

## Import dataset and packages

In [23]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

In [4]:
iris = load_iris()

In [10]:
feature_names = iris.feature_names
target_names = iris.target_names
X = iris.data
y = iris.target

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

## Splitting data

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 
X_train.shape

(90, 4)

## Create a model

### K-Nearest Neighbor

In [19]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

### Decision Tree

In [24]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

DecisionTreeClassifier()

## Test the trained models

In [25]:
# KNN
y_pred_knn = knn.predict(X_test)
knn_score = metrics.accuracy_score(y_test, y_pred_knn)

# Decision Tree
y_pred_dt = dt.predict(X_test)
dt_score = metrics.accuracy_score(y_test, y_pred_dt)

# Print scores
print(f"KNN score: {knn_score} Decision Tree score: {dt_score}")

KNN score: 0.95 Decision Tree score: 0.9333333333333333


## Prediction

In [29]:
sample = [[3, 5, 4, 2], [2, 3, 5, 4]]
knn_pred = knn.predict(sample)
predict_species = [iris.target_names[p] for p in knn_pred]
print("Predictions:", predict_species)

Predictions: ['versicolor', 'virginica']


## Save and load the model with joblib
**Model presistence**: After training a scikit-learn model, it is desirable to have a way to persist the model for future use without having to retrain. 

In [31]:
from joblib import dump, load
dump(knn, 'mlbrain.joblib')

['mlbrain.joblib']

In [34]:
model = load('mlbrain.joblib')
model.predict(sample)
predict_species = [iris.target_names[p] for p in knn_pred]
print("Predictions:", predict_species)

Predictions: ['versicolor', 'virginica']
