In [29]:
import csv
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn import tree
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier


In [18]:
def load_file(fileName):
    dataset = pd.read_table(fileName, header=0, sep=",", encoding="unicode_escape")
    
    return dataset

In [19]:
# preprocess creates the term frequency matrix for the review data set
def preprocess(test,train):
    list_of_feature = ["label"]
    
    for i in range(1,test.shape[1]):
        list_of_feature.append("pixel" + str(i))
    trainingY, trainingX = train[list_of_feature[0]], train[list_of_feature[1:]]
    testY, testX = test[list_of_feature[0]], test[list_of_feature[1:]]
    
    return trainingX,trainingY,testX,testY

In [38]:
def learn_model(data,target):
  
    classifier = []
    #naive bayes
    gnb = GaussianNB()
    classifier.append(gnb.fit(data, target))
    
    #decision tree
    des_tree = tree.DecisionTreeClassifier()
    classifier.append(des_tree.fit(data, target))
    
    #Random Forest Classifier
    rfc = RandomForestClassifier(n_estimators=100)
    classifier.append(rfc.fit(data,target))
    
    #AdaBoost classifier
    abc =AdaBoostClassifier(n_estimators=100)
    classifier.append(abc.fit(data, target))
    
    return classifier

In [22]:
def classify(classifier, testdata):
    
    predicted_val =[]
    for i in classifier:
        predicted_val.append(i.predict(testdata))
    
    return predicted_val


In [42]:
def evaluate(actual_class, predicted_class):
        
    accuracy = []   
    for i in predicted_class:
        accuracy.append(accuracy_score(actual_class, i))
    
    print("The accuracy score of naive bayes is :",accuracy[0])
    print("The accuracy score of decision tree is :",accuracy[1])
    print("The accuracy score of Random Forest Classifier is :",accuracy[2])
    print("The accuracy score of AdaBoost classifier is :",accuracy[3])

In [33]:
print("Loading data.....")
train = load_file("fashion-mnist_train.csv")
test = load_file("fashion-mnist_test.csv")


Loading data.....


In [34]:
print("preprocessing data.....")
trainingX,trainingY,testX,testY = preprocess(test,train)

preprocessing data.....


In [39]:
print("Learning model.....")
model = learn_model(trainingX,trainingY)

Learning model.....


In [40]:
print("Classifying test data......")      
predictedY = classify(model, testX)

Classifying test data......


In [43]:
print("Evaluating results.....")
evaluate(testY,predictedY)

Evaluating results.....
The accuracy score of naive bayes is : 0.5914
The accuracy score of decision tree is : 0.7961
The accuracy score of Random Forest Classifier is : 0.885
The accuracy score of AdaBoost classifier is : 0.5806
