In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
# read train and test set and separate labels
train_all = np.load('fashion_train.npy')
test_all = np.load('fashion_test.npy')

train_labels = train_all[:,-1]
train_featues = train_all[:, :-1]

test_labels = test_all[:,-1]
test_features = test_all[:, :-1]

In [36]:
# import required sklearn elements
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix

In [47]:
# setting up pipeline
preprocessing_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("pca", PCA(n_components=50))
])

processed_train_data = preprocessing_pipeline.fit_transform(train_featues)

In [48]:
rf_classifier = RandomForestClassifier(max_depth = 15)
cross_val_score(rf_classifier, processed_train_data, train_labels, cv = 5, scoring='accuracy')

array([0.8395, 0.8405, 0.851 , 0.847 , 0.8435])

In [49]:
dc_classifier = DecisionTreeClassifier(max_depth=10)
cross_val_score(dc_classifier, processed_train_data, train_labels, cv = 5, scoring='accuracy')

array([0.7745, 0.775 , 0.7675, 0.7705, 0.765 ])

In [59]:
sv_classifier = SVC(kernel='poly', degree = 3, coef0=2, C = 5)
cross_val_score(sv_classifier, processed_train_data, train_labels, cv = 5, scoring='accuracy')

array([0.869 , 0.8575, 0.864 , 0.8535, 0.8505])

In [60]:
processed_test_data = preprocessing_pipeline.transform(test_features)

In [61]:
sv_classifier.fit(processed_train_data, train_labels)
sv_test_y = sv_classifier.predict(processed_test_data)
print(accuracy_score(test_labels, sv_test_y))
print(confusion_matrix(test_labels, sv_test_y))

0.8444
[[822   6  17  34 121]
 [  4 966   2  23   5]
 [ 34   4 847  23  92]
 [ 37   8  13 909  33]
 [160   7 119  36 678]]


In [62]:
rf_classifier.fit(processed_train_data, train_labels)
rf_test_y = rf_classifier.predict(processed_test_data)
print(accuracy_score(test_labels, rf_test_y))
print(confusion_matrix(test_labels, rf_test_y))

0.838
[[824   0  16  51 109]
 [  3 944   8  35  10]
 [ 17   0 860  17 106]
 [ 30  10   6 913  41]
 [165   1 146  39 649]]
