For each of the below Classifiers, do the following:

1. Evaluate its performance on the sample 'wine' dataset built into scikit-learn
2. Learn and explain how the model works, and if it is binary / multi classification.
3. Attempt to explain why the model performed how it did with the given dataset.

* Logistic Regression
* Decision Trees
* Random Forest
* Support Vector Machines (SVM) (Both with linear kernels and non-linear kernels!)
* Naive Bayes
* K-Nearest Neighbors (KNN)
* Gradient Boosting Machines (GBM)
* Linear Discriminant Analysis (LDA)

In [104]:
from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import pandas as pd

wine = load_wine()
X = wine.data
y = wine.target
df = pd.DataFrame(wine.data, columns = wine.feature_names)
df['target'] = pd.Categorical.from_codes(wine.target,wine.target_names)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=16)
# print(wine.DESCR)
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,class_0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,class_0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,class_0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,class_0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,class_0


## Logistic Regression

Models probabilities using a logistic function. Works with binary or multi


In [76]:
log_model = LogisticRegression(max_iter=4000)
log_model.fit(X_train,y_train)
log_pred = log_model.predict(X_test)

accuracy = accuracy_score(y_test, log_pred)
precision = precision_score(y_test, log_pred, average='weighted')
recall = recall_score(y_test, log_pred, average='weighted')
confusion = confusion_matrix(y_test,log_pred)

print(round(accuracy, 5))
print(round(precision, 5))
print(round(recall, 5))
print(confusion)

0.96296
0.96497
0.96296
[[17  0  0]
 [ 1 17  1]
 [ 0  0 18]]


## Decision Trees
Method finds splits (decision nodes) that result in information gain (show something about where different class data lie) \
and repeats until most or all data is sorted into pure final (leaf) nodes \
New data is classed according to decision node conditions and majority vote if applicable \
Binary or multiclass

In [77]:
dtree_model = DecisionTreeClassifier()
dtree_model.fit(X_train,y_train)
dtree_pred = dtree_model.predict(X_test)

accuracy = accuracy_score(y_test, dtree_pred)
precision = precision_score(y_test, dtree_pred, average='weighted')
recall = recall_score(y_test, dtree_pred, average='weighted')
confusion = confusion_matrix(y_test,dtree_pred)

print(round(accuracy, 5))
print(round(precision, 5))
print(round(recall, 5))
print(confusion)
# tree.plot_tree(dtree_model)

0.94444
0.94444
0.94444
[[17  0  0]
 [ 1 17  1]
 [ 0  1 17]]


## Random Forest

Ensemble of decision trees.

In [94]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train,y_train)
rf_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, rf_pred)
precision = precision_score(y_test, rf_pred, average='weighted')
recall = recall_score(y_test, rf_pred, average='weighted')
confusion = confusion_matrix(y_test,rf_pred)

print(round(accuracy, 5))
print(round(precision, 5))
print(round(recall, 5))
print(confusion)

0.98148
0.98246
0.98148
[[17  0  0]
 [ 0 18  1]
 [ 0  0 18]]


## Support Vector Machine - Linear

In [80]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train,y_train)
svm_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, svm_pred)
precision = precision_score(y_test, svm_pred, average='weighted')
recall = recall_score(y_test, svm_pred, average='weighted')
confusion = confusion_matrix(y_test,svm_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9629629629629629
0.9649664284167209
0.9629629629629629
[[17  0  0]
 [ 1 17  1]
 [ 0  0 18]]


## LinearSVC

In [102]:
svm_model = LinearSVC(max_iter=50000)
svm_model.fit(X_train,y_train)
svm_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, svm_pred)
precision = precision_score(y_test, svm_pred, average='weighted')
recall = recall_score(y_test, svm_pred, average='weighted')
confusion = confusion_matrix(y_test,svm_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)



0.9074074074074074
0.9122685185185185
0.9074074074074074
[[17  0  0]
 [ 3 15  1]
 [ 0  1 17]]




## Support Vector Machine - RBF

In [93]:
svml_model = SVC(kernel='rbf') # Sigmoid and poly even worse than this
svml_model.fit(X_train,y_train)
svml_pred = svml_model.predict(X_test)

accuracy = accuracy_score(y_test, svml_pred)
precision = precision_score(y_test, svml_pred, average='weighted')
recall = recall_score(y_test, svml_pred, average='weighted')
confusion = confusion_matrix(y_test,svml_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.6111111111111112
0.5847701149425287
0.6111111111111112
[[15  0  2]
 [ 1 15  3]
 [ 1 14  3]]


## Naive Bayes

Looks at each column and makes probability function associating feature values with a given outcome \
Naive means assuming each column is independent

In [101]:
nb_model = MultinomialNB()
nb_model.fit(X_train,y_train)
nb_pred = nb_model.predict(X_test)

accuracy = accuracy_score(y_test, nb_pred)
precision = precision_score(y_test, nb_pred, average='weighted')
recall = recall_score(y_test, nb_pred, average='weighted')
confusion = confusion_matrix(y_test,nb_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.7777777777777778
0.7789760348583877
0.7777777777777778
[[15  2  0]
 [ 1 14  4]
 [ 1  4 13]]


## K-Nearest Neighbors

In [81]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train,y_train)
knn_pred = knn_model.predict(X_test)

accuracy = accuracy_score(y_test, knn_pred)
precision = precision_score(y_test, knn_pred, average='weighted')
recall = recall_score(y_test, knn_pred, average='weighted')
confusion = confusion_matrix(y_test,knn_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.6851851851851852
0.6691798941798942
0.6851851851851852
[[17  0  0]
 [ 1 12  6]
 [ 2  8  8]]


## Gradient Boosting Machines

In [87]:
gbm_model = GradientBoostingClassifier()
gbm_model.fit(X_train,y_train)
gbm_pred = gbm_model.predict(X_test)

accuracy = accuracy_score(y_test, gbm_pred)
precision = precision_score(y_test, gbm_pred, average='weighted')
recall = recall_score(y_test, gbm_pred, average='weighted')
confusion = confusion_matrix(y_test,gbm_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9629629629629629
0.9639917695473251
0.9629629629629629
[[17  0  0]
 [ 1 18  0]
 [ 0  1 17]]


## Linear Discriminant Analysis

Find a linear combination of features that best separate the classes. Focused on maximizing separation of classes.

In [88]:
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train,y_train)
lda_pred = lda_model.predict(X_test)

accuracy = accuracy_score(y_test, lda_pred)
precision = precision_score(y_test, lda_pred, average='weighted')
recall = recall_score(y_test, lda_pred, average='weighted')
confusion = confusion_matrix(y_test,lda_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9814814814814815
0.9824561403508772
0.9814814814814815
[[17  0  0]
 [ 0 18  1]
 [ 0  0 18]]
