In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GroupKFold, GridSearchCV, cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
training_data = np.load(f'../fashion_train.npy')
test_data = np.load(f'../fashion_test.npy')

In [3]:
df_train = pd.DataFrame(training_data)
df_test = pd.DataFrame(test_data)

In [4]:
X_train = df_train.iloc[:, :-1]  # All columns except the last one are features
y_train = df_train.iloc[:, -1]   # The last column is the label

X_test = df_test.iloc[:, :-1]  # All columns except the last one are features
y_test = df_test.iloc[:, -1]   # The last column is the label

In [8]:


clf_Tree = DecisionTreeClassifier(random_state=42)

clf_Tree.fit(X_train,y_train)


y_pred_best = clf_Tree.predict(X_test)

In [10]:
# Evaluate the best model
accuracy_best = accuracy_score(y_test, y_pred_best)
print("Best Decision Tree Accuracy:", accuracy_best)

# Compute confusion matrix for the best model
cm_best = confusion_matrix(y_test, y_pred_best)
print("Best Model Confusion Matrix:")
print(cm_best)

Best Decision Tree Accuracy: 0.7682
Best Model Confusion Matrix:
[[740   7  21  55 177]
 [ 15 933   5  33  14]
 [ 27   4 754  22 193]
 [ 48  31  19 825  77]
 [167  12 172  60 589]]


In [26]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


num_samples = X_train.shape[0]
# MLPClassifier configuration with improvements
mlp = MLPClassifier(
    hidden_layer_sizes=(16,),  # Two hidden layers with more neurons
    activation='relu',           # Relu as a substitute for leaky_relu
    solver='sgd',                # Stochastic Gradient Descent
    learning_rate_init=0.01,    # Reduced learning rate
    max_iter=10000,              # Number of iterations
    batch_size=num_samples,              # Mini-batch gradient descent
    momentum=0,                # Enable momentum
    random_state=42              # Ensures reproducibility
)


# Training
mlp.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = mlp.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(cm)


Accuracy: 0.8272
Confusion Matrix:
[[799   2  18  62 119]
 [  8 942  12  33   5]
 [ 24   2 855  20  99]
 [ 34  10  15 897  44]
 [159   1 154  43 643]]


In [27]:
clf_Knn = KNeighborsClassifier(n_neighbors=10)
clf_Knn.fit(X_train, y_train);

In [24]:
# Make predictions on the test set
y_pred_best = clf_Knn.predict(X_test)

# Evaluate the best model
accuracy_best = accuracy_score(y_test, y_pred_best)
print("Best Decision Tree Accuracy:", accuracy_best)

# Compute confusion matrix for the best model
cm_best = confusion_matrix(y_test, y_pred_best)
print("Best Model Confusion Matrix:")
print(cm_best)

Best Decision Tree Accuracy: 0.825
Best Model Confusion Matrix:
[[857   2  25  22  94]
 [ 11 950  12  21   6]
 [ 15   1 866  14 104]
 [ 50   5  19 882  44]
 [203   0 194  33 570]]
