# Multimodels exercise


In [1]:
%matplotlib notebook


In [2]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import confusion_matrix, accuracy_score


In [3]:
WINE_DATA_PATH = "../Jupyter/wine.csv"
WINE_TEST_DATA_PATH = "../Jupyter/wineTestModel.csv"


In [4]:
# Configuration variables
N_NEIGHBORS = 3


In [5]:
knn = "KNN"
decision_tree = "DecisionTree"
logistic_regression = "LogisticRegression"
svm = "SVM"
bayes = "Bayes"

available_models = {
    knn: KNeighborsClassifier(n_neighbors=N_NEIGHBORS),
    decision_tree: DecisionTreeClassifier(),
    logistic_regression: LogisticRegression(),
    svm: SVC(),
    bayes: GaussianNB()
}

predictions = {}

accuracy_dict = {}


In [7]:
data = pd.read_csv(WINE_DATA_PATH, skipinitialspace=True, skip_blank_lines=True)
# test_data = pd.read_csv(WINE_TEST_DATA_PATH, skipinitialspace=True, skip_blank_lines=True)

training_labels = data["cultivar"]

data.drop("cultivar", inplace=True, axis=1)

X_train, X_test, y_train, y_test = train_test_split(
    data, training_labels, random_state=1988
)

for model_name in available_models.keys():
    model = available_models.get(model_name)
    
    model.fit(X_train, y_train)
    
    prediction = model.predict(X_test)
    
    predictions[model_name] = prediction
    acc = accuracy_score(y_test, prediction)
    
    accuracy_dict[model_name] = acc
    
    print(f" Confusion Matrix for: {model_name}, Accuracy: {acc}")
    print(confusion_matrix(y_test, prediction))


 Confusion Matrix for: KNN, Accuracy: 0.5581395348837209
[[11  1  1]
 [ 2 11  5]
 [ 0 10  2]]
 Confusion Matrix for: DecisionTree, Accuracy: 0.8837209302325582
[[12  1  0]
 [ 2 15  1]
 [ 1  0 11]]
 Confusion Matrix for: LogisticRegression, Accuracy: 0.9767441860465116
[[13  0  0]
 [ 0 17  1]
 [ 0  0 12]]
 Confusion Matrix for: SVM, Accuracy: 0.46511627906976744
[[ 1 12  0]
 [ 0 18  0]
 [ 0 11  1]]
 Confusion Matrix for: Bayes, Accuracy: 0.9302325581395349
[[11  2  0]
 [ 1 17  0]
 [ 0  0 12]]




In [17]:
data = pd.read_csv(WINE_DATA_PATH, skipinitialspace=True, skip_blank_lines=True)
test_data = pd.read_csv(WINE_TEST_DATA_PATH, skipinitialspace=True, skip_blank_lines=True)

training_labels = data["cultivar"]

data.drop("cultivar", inplace=True, axis=1)
test_data.drop("cultivar", inplace=True, axis=1)

for model_name in available_models.keys():
    model = available_models.get(model_name)
    
    model.fit(data, training_labels)
    
    prediction = model.predict(test_data)
    
    print(f" Prediction using: {model_name}")
    print(f"{prediction}")



 Prediction using: KNN
[1 1 1 2 1 3]
 Prediction using: DecisionTree
[1 1 2 2 3 3]
 Prediction using: LogisticRegression
[1 1 1 2 3 3]
 Prediction using: SVM
[2 2 2 2 2 2]
 Prediction using: Bayes
[1 1 2 2 3 3]




Plotting Examples: http://queirozf.com/entries/pandas-dataframe-plot-examples-with-matplotlib-pyplot


In [7]:
data = pd.read_csv(WINE_DATA_PATH, skipinitialspace=True, skip_blank_lines=True)

training_labels = data["cultivar"]

data.drop("cultivar", inplace=True, axis=1)

for model_name in available_models.keys():
    model = available_models.get(model_name)
    
    #model.fit(X_train, y_train)
    
    predicted = cross_val_predict(model, data, training_labels, cv=3)
    
    fig, ax = plt.subplots()
    ax.scatter(training_labels, predicted)
    ax.plot([training_labels.min(), training_labels.max()], [training_labels.min(), training_labels.max()], 'k--', lw=4)
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    
    plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>