# CSc 177 Prj 3 - Classification Models

Author: Alec Resha
Date: 04/19/2022

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from seaborn import heatmap


## My Dataset
Wine Quality Dataset for predicting quality (0+10)   

### Data Processing

In [2]:
df = pd.read_csv('data/winequality-white.csv', sep=';')
df.dropna(inplace=True)

## Data setup

In [3]:
# Using all the features
x = df.drop(['quality'], axis=1).values
y = df['quality'].values

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [18]:
def make_heatmap(cm, y_pred):
    ax = heatmap(cm, annot=True, cmap='Blues')
    ax.set_title('Seaborn Confusion Matrix with labels\n\n');
    ax.set_xlabel('\nPredicted Values')
    ax.set_ylabel('Actual Values ');

    ax.xaxis.set_ticks(np.arange(0, len(np.unique(y_pred)), 1))
    ax.xaxis.set_ticklabels(np.unique(y_pred))
    ax.yaxis.set_ticks(np.arange(0, len(np.unique(y_pred)), 1))
    ax.yaxis.set_ticklabels(np.unique(y_pred))

    ## Display the visualization of the Confusion Matrix.
    plt.show()

In [14]:
def run_all_algos(x_train, x_test, y_train, y_test):
    acc = {
        "bayes": 0,
        "knn": 0,
        "svm": 0,
        "dt": 0,
        "log": 0
    }
    # Naive Bayes
    bayes = GaussianNB()
    bayes = bayes.fit(x_train, y_train)
    acc["bayes"] = [accuracy_score(y_test, bayes.predict(x_test)), confusion_matrix(y_test, bayes.predict(x_test))]
    print("Naive Bayes completed")
    
    # KNN
    knn = KNeighborsClassifier(n_neighbors=4)
    knn = knn.fit(x_train, y_train)
    acc["knn"] = [accuracy_score(y_test, knn.predict(x_test)), confusion_matrix(y_test, knn.predict(x_test))]
    print("KNN completed")
    
    # SVM
    svm_clf = svm.SVC(kernel='sigmoid')
    svm_clf = svm_clf.fit(x_train, y_train)
    acc["svm"] = [accuracy_score(y_test, svm_clf.predict(x_test)), confusion_matrix(y_test, svm_clf.predict(x_test))]
    print("SVM completed")
    
    # Decision Tree
    dt = tree.DecisionTreeClassifier()
    dt = dt.fit(x_train, y_train)
    acc["dt"] = [accuracy_score(y_test, dt.predict(x_test)), confusion_matrix(y_test, dt.predict(x_test))]
    print("Decision Tree completed")
    
    # Logistic Regression
    log = LogisticRegression()
    log = log.fit(x_train, y_train)
    acc["log"] = [accuracy_score(y_test, log.predict(x_test)), confusion_matrix(y_test, log.predict(x_test))]
    print("Logistic Regression completed")
    
    return acc

In [None]:
def display_data(acc):
    

In [15]:
acc = run_all_algos(x_train, x_test, y_train, y_test)

Naive Bayes completed
KNN completed
SVM completed
Decision Tree completed
Logistic Regression completed


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


{'bayes': [0.4387755102040816, array([[  2,   0,   1,   2,   0,   0,   0],
       [  1,   6,   8,   6,   4,   0,   0],
       [  7,  14, 155,  79,  36,   0,   0],
       [  4,   5, 117, 144, 159,   2,   1],
       [  0,   0,  22,  41, 123,   6,   0],
       [  0,   1,   2,   5,  27,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0]], dtype=int64)], 'knn': [0.49183673469387756, array([[  1,   0,   3,   0,   1,   0],
       [  1,   3,  11,   9,   1,   0],
       [  0,  12, 160,  98,  18,   3],
       [  1,   7, 118, 250,  50,   6],
       [  0,   4,  35,  87,  60,   6],
       [  0,   0,   1,  16,  10,   8]], dtype=int64)], 'svm': [0.3051020408163265, array([[  0,   0,   0,   3,   0,   2],
       [  0,   0,  11,  12,   0,   2],
       [  0,   0,  90, 171,   0,  30],
       [  0,   0, 197, 208,   0,  27],
       [  0,   0, 100,  87,   0,   5],
       [  0,   0,  27,   7,   0,   1]], dtype=int64)], 'dt': [0.6091836734693877, array([[  0,   1,   3,   1,   0,   0],
       [  1,   7,   8,