In [60]:
import csv
import pandas as pd
import numpy as np
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [61]:
def load_file(fileName):
    dataset = pd.read_table(fileName, header=0, sep=",", encoding="unicode_escape")   
    return dataset

In [62]:
# preprocess creates the term frequency matrix for the review data set
def preprocess(data):
    Y = data['label']
    X = data.drop('label', axis = 1)

    return X, Y

In [63]:
def learn_model(data, target, classifiers, models):
    for c in classifiers:
        if c == "Decision Tree":
            models.append(DecisionTreeClassifier().fit(data, target))
        elif c == "Random Forest":
            models.append(RandomForestClassifier().fit(data, target))
        elif c == "Naive Bayes":
            models.append(GaussianNB().fit(data, target))
        elif c == "Linear Discriminant":
            models.append(LinearDiscriminantAnalysis().fit(data, target))
    return models

In [64]:
def classify(models, testdata):
    # predictions = dict()
    predictions = []
    for classifier in models:
        predicted_val=[]
        predicted_val = classifier.predict(testdata)
        # print(predicted_val)
        # predictions[classifier] = predicted_val
        predictions.append(predicted_val)
    
    # print(predictions)
    return predictions

In [65]:
def evaluate(actual_class, predicted_class, classifiers, models):
    for i in range(len(models)):
        accuracy = accuracy_score(actual_class, predicted_class[i])
      #   precision = precision_score(actual_class, predicted_class[i])
      #   fmeasure = f1_score(actual_class, predicted_class[i])
      #   confusion = confusion_matrix(actual_class, predicted_class[i])
        
        print("The accuracy score for ", classifiers[i], " classifier is: ", accuracy)
      #   print("The precision score for ", classifiers[i], " classifier is: ", precision)
      #   print("The f1 score for ", classifiers[i], " classifier is: ", fmeasure)
      #   print("The confusion matrix for ", classifiers[i], " classifier is: ", confusion)

In [66]:
print("Loading data.....")
datasetTrain = load_file("fashion-mnist_train.csv")
trainingX, trainingY = preprocess(datasetTrain)
datasetTest = load_file("fashion-mnist_test.csv")
testX, testY = preprocess(datasetTest)

classifiers = ["Decision Tree", "Random Forest", "Naive Bayes", "Linear Discriminant"]
# classifiers = ["Decision Tree"]
models = []

print("Splitting data.....")

#Learn a classifier 
print("----------------------------------")
print("Learning Decision Tree Classifier")
print("----------------------------------")
print("Learning Random Forest Classifier")
print("----------------------------------")
print("Learning Naive Bayes Classifier")
print("----------------------------------")
print("Learning Linear Discriminant Classifier")
print("----------------------------------")
models = learn_model(trainingX, trainingY, classifiers, models)
#Make predictions
print("Classifying test data......")      
predictedY = classify(models, testX)
#Evaluate results
# accuracy = precision = recall = f_measure = -1    
print("Evaluating results.....")
evaluate(testY, predictedY, classifiers, models)



Loading data.....
Splitting data.....
----------------------------------
Learning Decision Tree Classifier
----------------------------------
Learning Random Forest Classifier
----------------------------------
Learning Naive Bayes Classifier
----------------------------------
Learning Linear Discriminant Classifier
----------------------------------
Classifying test data......
Evaluating results.....
The accuracy score for  Decision Tree  classifier is:  0.7993
The accuracy score for  Random Forest  classifier is:  0.8856
The accuracy score for  Naive Bayes  classifier is:  0.5914
The accuracy score for  Linear Discriminant  classifier is:  0.8256
