# Supervised Learning | Classification (Model Selection & Evaluation)

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# sns.set()

==========

## Model Selection & Evaluation

### Model Selection

In [None]:
from IPython.display import Image
Image("imgs/ml_map.png")

Choosing the Right Estimator: https://scikit-learn.org/stable/tutorial/machine_learning_map/index.html

### Importing Dataset

In [None]:
df = pd.read_csv('data/heights_weights.csv')
df

### Exploring Data

In [None]:
sns.scatterplot(x="Height", y="Weight", hue="Male", data=df)

### Splitting & Preprocessing Data

In [None]:
X = df.iloc[:,0:2].values 
X

In [None]:
y = df.iloc[:,2].values
y

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
Y_train.shape

In [None]:
Y_test.shape

### Model Selection & Evaluation

Scikit-learn Classification Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix

### 1] Logestic Regression Performance

In [None]:
from sklearn.linear_model import LogisticRegression

# Fit (train) the Logistic Regression classifier
logreg_clf = LogisticRegression()
logreg_model = logreg_clf.fit(X_train, Y_train)
logreg_prediction = logreg_clf.predict(X_test)

In [None]:
print("Accuracy {0:.2f}%".format(100*accuracy_score(logreg_prediction, Y_test)))

In [None]:
print(confusion_matrix(logreg_prediction, Y_test))

In [None]:
plot_confusion_matrix(logreg_clf, X_test, Y_test)

In [None]:
print(classification_report(logreg_prediction, Y_test))

### 2] Random Forest Performance

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Fit (train) the Random Forest classifier
ranfor_clf = RandomForestClassifier()
ranfor_model = ranfor_clf.fit(X_train, Y_train)
ranfor_prediction = ranfor_clf.predict(X_test)

In [None]:
print("Accuracy {0:.2f}%".format(100*accuracy_score(ranfor_prediction, Y_test)))

In [None]:
print(confusion_matrix(ranfor_prediction, Y_test))

In [None]:
plot_confusion_matrix(ranfor_clf, X_test, Y_test)

In [None]:
print(classification_report(ranfor_prediction, Y_test))

### 3] Support Vector Machine Performance

In [None]:
from sklearn.svm import SVC

# Fit (train) the Support Vector Machine classifier
svm_clf = SVC()
svm_model = svm_clf.fit(X_train, Y_train)
svm_prediction = svm_clf.predict(X_test)

In [None]:
print("Accuracy {0:.2f}%".format(100*accuracy_score(svm_prediction, Y_test)))

In [None]:
print(confusion_matrix(svm_prediction, Y_test))

In [None]:
plot_confusion_matrix(svm_clf, X_test, Y_test)

In [None]:
print(classification_report(svm_prediction, Y_test))

### 4] K-NN Classifier Performance

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Fit (train) the KNN classifier
knn_clf = KNeighborsClassifier()
knn_model = knn_clf.fit(X_train, Y_train)
knn_prediction = knn_clf.predict(X_test)

In [None]:
print("Accuracy {0:.2f}%".format(100*accuracy_score(knn_prediction, Y_test)))

In [None]:
print(confusion_matrix(knn_prediction, Y_test))

In [None]:
plot_confusion_matrix(knn_clf, X_test, Y_test)

In [None]:
print(classification_report(knn_prediction, Y_test))

### Other Evaluation Metrics

In [None]:
from sklearn.metrics import plot_roc_curve

##### Logistic Regression

In [None]:
plot_roc_curve(logreg_model,X_test,Y_test)

##### Random Forest

In [None]:
plot_roc_curve(ranfor_model,X_test,Y_test)

##### Support Vector Machine

In [None]:
plot_roc_curve(svm_model,X_test,Y_test)

##### K-NN Classifier

In [None]:
plot_roc_curve(knn_model,X_test,Y_test)

==========

# THANK YOU!