In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import ensemble
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import time

In [None]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

In [None]:
train = pd.read_csv('./archive/fashion-mnist_train.csv')
test = pd.read_csv('./archive/fashion-mnist_test.csv')
public = pd.read_csv('./archive/fashion-mnist_public.csv')
private = pd.read_csv('./archive/fashion-mnist_private.csv')

In [None]:
X_train = train.drop(['label'], axis = 1)
X_train

In [None]:
X_train.shape

In [None]:
y_train = train['label']
y_train

In [None]:
y_train.shape

In [None]:
X_test = test.drop(['label'], axis = 1)
X_test

In [None]:
X_test.shape

In [None]:
y_test = test['label']
y_test

In [None]:
y_test.shape

In [None]:
X_public = public.drop(['label'], axis = 1)
X_public

In [None]:
y_public = public['label']

In [None]:
y_public.shape

In [None]:
scaler = MinMaxScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)
X_public_scale = scaler.transform(public)
private_scale = scaler.transform(private)

In [None]:
pca = PCA(n_components=187, random_state=42)
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)
X_public_pca = pca.transform(X_public_scale)
private_pca = pca.transform(private_scale)

In [None]:
X_train_pca.shape

In [None]:
X_test_pca.shape

In [None]:
X_public_pca.shape

In [None]:
private_pca.shape

In [None]:
X_train_PCA1 = pd.DataFrame(X_train_pca)
X_test_PCA1 = pd.DataFrame(X_test_pca)

In [None]:
# 1. LR Model
logistic = LogisticRegression(max_iter=200, solver='liblinear')
logistic.fit(X_train_PCA1, y_train)

In [None]:
# 2. SVC Model
svc = SVC(C=13, kernel='rbf', gamma="auto", probability=True)
svc.fit(X_train_PCA1, y_train)

In [None]:
# 3. Random Forest
random_forest = RandomForestClassifier(criterion='entropy', max_depth=70, n_estimators=100)
random_forest.fit(X_train_PCA1, y_train)

In [None]:
# 4. Gradient Boosting Method 
Gradient = ensemble.GradientBoostingClassifier(n_estimators=100)
Gradient.fit(X_train_PCA1, y_train)

In [None]:
# 5. XGBoost Method
xgb = XGBClassifier(use_label_encoder=False, objective="multi:softmax", eval_metric="merror")
xgb.fit(X_train_PCA1, y_train)

In [None]:
# 6. lightGBM Method
lgbm = LGBMClassifier(boosting_type='gbdt', objective='multiclass', num_class=10, num_iterations=100, max_depth=4, learning_rate = 0.01, n_estimators=100, nthreads=4, silent=False, boost_from_average=True)
lgbm.fit(X_train_PCA1, y_train)

In [None]:
y_train_lr = logistic.predict(X_train_PCA1)
y_pred_lr = logistic.predict(X_test_pca)
logistic_train = metrics.accuracy_score(y_train, y_train_lr)
logistic_accuracy = metrics.accuracy_score(y_test, y_pred_lr)

print("Train Accuracy score: {}".format(logistic_train))
print("Test Accuracy score: {}".format(logistic_accuracy))
print(metrics.classification_report(y_test, y_pred_lr))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_lr, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on Logistic Regression")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
y_train_svc = svc.predict(X_train_PCA1)
y_pred_svc = svc.predict(X_test_pca)
svc_train = metrics.accuracy_score(y_train, y_train_svc)
svc_accuracy = metrics.accuracy_score(y_test, y_pred_svc)

print("Train Accuracy score: {}".format(svc_train))
print("Test Accuracy score: {}".format(svc_accuracy))
print(metrics.classification_report(y_test, y_pred_svc))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_svc, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on SVC")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
y_train_forest = random_forest.predict(X_train_PCA1)
y_pred_forest = random_forest.predict(X_test_pca)
random_forest_train = metrics.accuracy_score(y_train, y_train_forest)
random_forest_accuracy = metrics.accuracy_score(y_test, y_pred_forest)

print("Train Accuracy score: {}".format(random_forest_train))
print("Test Accuracy score: {}".format(random_forest_accuracy))
print(metrics.classification_report(y_test, y_pred_forest))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_forest, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on Random Forest")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
y_train_gradient = Gradient.predict(X_train_PCA1)
y_pred_gradient = Gradient.predict(X_test_pca)
gradient_train = metrics.accuracy_score(y_train, y_train_gradient)
gradient_accuracy = metrics.accuracy_score(y_test, y_pred_gradient)

print("Train Accuracy score: {}".format(gradient_train))
print("Test Accuracy score: {}".format(gradient_accuracy))
print(metrics.classification_report(y_test, y_pred_gradient))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_gradient, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on Gradient Boosting")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
y_train_xgboost = xgb.predict(X_train_PCA1)
y_pred_xgboost = xgb.predict(X_test_pca)
xgb_train = metrics.accuracy_score(y_train, y_train_xgboost)
xgb_accuracy = metrics.accuracy_score(y_test, y_pred_xgboost)

print("Train Accuracy score: {}".format(xgb_train))
print("Test Accuracy score: {}".format(xgb_accuracy))
print(metrics.classification_report(y_test, y_pred_xgboost))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_xgboost, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on XGBoost")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
y_train_lgbm = lgbm.predict(X_train_PCA1)
y_pred_lgbm = lgbm.predict(X_test_pca)
lgbm_train = metrics.accuracy_score(y_train, y_train_lgbm)
lgbm_accuracy = metrics.accuracy_score(y_test, y_pred_lgbm)

print("Train Accuracy score: {}".format(lgbm_train))
print("Test Accuracy score: {}".format(lgbm_accuracy))
print(metrics.classification_report(y_test, y_pred_lgbm))

In [None]:
con_matrix = pd.crosstab(pd.Series(y_test.values.flatten(), name='Actual'), pd.Series(y_pred_lgbm, name='Predicted'))
plt.figure(figsize = (9,6))
plt.title("Confusion Matrix on LightGBM")
sns.heatmap(con_matrix, cmap="Blues", annot=True, fmt='g')
plt.show()

In [None]:
Train_Accuracy = [logistic_train, svc_train, random_forest_train, gradient_train, xgb_train, lgbm_train]
Test_Accuracy = [logistic_accuracy, svc_accuracy, random_forest_accuracy, gradient_accuracy, xgb_accuracy, lgbm_accuracy]
data1 = {
    'Algorithm': ['Logistic Regression','SVC','Random Forest Classifier','Gradient Boosting','XGBoost', 'LightGBM'],
    'Train Accuracy':Train_Accuracy,
    'Test Accuracy':Test_Accuracy
}

df1 = pd.DataFrame(data1)

In [None]:
df1

In [None]:
fig = go.Figure(data=[
    go.Bar(name='train set', x=data1['Algorithm'], y=data1['Train Accuracy'],text=np.round(data1['Train Accuracy'],2),textposition='outside'),
    go.Bar(name='test set', x=data1['Algorithm'], y=data1['Test Accuracy'],text=np.round(data1['Test Accuracy'],2),textposition='outside')
])

fig.update_layout(barmode='group',title_text='Accuracy Comparison On Different Models',yaxis=dict(
        title='Accuracy'))
fig.show()