In [23]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import pandas as pd

In [152]:
data = load_wine()

X = data.data
y = data.target

print(data)

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 

In [151]:
df = pd.DataFrame(data.data, columns = data.feature_names)
df['target'] = pd.Categorical.from_codes(data.target,data.target_names)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=16)

In [111]:
# Logistic Regression Classifier
log_model = LogisticRegression(penalty='l2', multi_class='multinomial', C=10, fit_intercept=True, max_iter=5000, random_state=1)
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)

accuracy = accuracy_score(y_test, log_pred)
precision = precision_score(y_test, log_pred, average='macro')
recall = recall_score(y_test, log_pred, average='macro')
confusion = confusion_matrix(y_test, log_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9555555555555556
0.9606481481481483
0.9487179487179488
[[15  0  0]
 [ 1 11  1]
 [ 0  0 17]]


In [98]:
# Decision Tree Classifier
tree_model = DecisionTreeClassifier(criterion= 'entropy', splitter='random', min_samples_leaf=3, random_state= 1)
tree_model.fit(X_train, y_train)
tree_pred = tree_model.predict(X_test)

accuracy = accuracy_score(y_test, tree_pred)
precision = precision_score(y_test, tree_pred, average='macro')
recall = recall_score(y_test, tree_pred, average='macro')
confusion = confusion_matrix(y_test, tree_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9111111111111111
0.9108734402852049
0.9034690799396682
[[15  0  0]
 [ 2 10  1]
 [ 0  1 16]]


In [117]:
# Random Forest Classifier
forest_model = RandomForestClassifier(n_estimators=20)
forest_model.fit(X_train, y_train)
forest_pred = forest_model.predict(X_test)

accuracy = accuracy_score(y_test, forest_pred)
precision = precision_score(y_test, forest_pred, average='macro')
recall = recall_score(y_test, forest_pred, average='macro')
confusion = confusion_matrix(y_test, forest_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9777777777777777
0.9814814814814815
0.9743589743589745
[[15  0  0]
 [ 0 12  1]
 [ 0  0 17]]


In [149]:
# Support Vector Classifier (linear)
sv_model = SVC(kernel='linear', gamma='auto', decision_function_shape='ovo')
sv_model.fit(X_train, y_train)
sv_pred = sv_model.predict(X_test)

accuracy = accuracy_score(y_test, sv_pred)
precision = precision_score(y_test, sv_pred, zero_division= 1.0, average='macro')
recall = recall_score(y_test, sv_pred, average='macro')
confusion = confusion_matrix(y_test, sv_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9555555555555556
0.9606481481481483
0.9487179487179488
[[15  0  0]
 [ 1 11  1]
 [ 0  0 17]]


In [150]:
# Support Vector Classifier (non-linear)
sv_model = SVC(kernel='poly', gamma='auto', degree=3, decision_function_shape='ovo')
sv_model.fit(X_train, y_train)
sv_pred = sv_model.predict(X_test)

accuracy = accuracy_score(y_test, sv_pred)
precision = precision_score(y_test, sv_pred, zero_division= 1.0, average='macro')
recall = recall_score(y_test, sv_pred, average='macro')
confusion = confusion_matrix(y_test, sv_pred)

print(accuracy)
print(precision)
print(recall)
print(confusion)

0.9777777777777777
0.9814814814814815
0.9743589743589745
[[15  0  0]
 [ 0 12  1]
 [ 0  0 17]]
