In [45]:
# ! pip install kagglehub
# ! pip install matplotlib

In [46]:
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

In [47]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zalando-research/fashionmnist")

print("Path to dataset files:", path)

Path to dataset files: /home/codespace/.cache/kagglehub/datasets/zalando-research/fashionmnist/versions/4


In [48]:
df = pd.read_csv(path + "/fashion-mnist_train.csv")

df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
sample_df = df.sample(n=30000, random_state=42)

In [50]:
sample_df.shape

(20000, 785)

In [51]:
X = sample_df.iloc[:, 1:].values
y = sample_df.iloc[:, :1].values


In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Pre - Processing

In [53]:
st_scaler = StandardScaler()

In [54]:

X_train = st_scaler.fit_transform(X_train)
X_test = st_scaler.transform(X_test)

In [55]:
X_train

array([[-0.01348522, -0.03527256, -0.0498121 , ...,  2.38657474,
         0.01416161, -0.04111855],
       [-0.01348522, -0.03527256, -0.0498121 , ..., -0.15810555,
        -0.09040037, -0.04111855],
       [-0.01348522, -0.03527256, -0.0498121 , ..., -0.15810555,
        -0.09040037, -0.04111855],
       ...,
       [-0.01348522, -0.03527256, -0.0498121 , ..., -0.15810555,
        -0.09040037, -0.04111855],
       [-0.01348522, -0.03527256, -0.0498121 , ..., -0.15810555,
        -0.09040037, -0.04111855],
       [-0.01348522, -0.03527256, -0.0498121 , ..., -0.15810555,
        -0.09040037, -0.04111855]], shape=(16000, 784))

# Dataset Class

In [56]:
# create custom dataset class

class FashionMNISTDataset(Dataset):

    def __init__(self, features, labels):
        print(type(features))
        print(type(labels))
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)


    def __len__(self):
        return len(self.features)


    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [57]:
train_dataset = FashionMNISTDataset(X_train, y_train)
test_dataset = FashionMNISTDataset(X_test, y_test)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# OPTUNA

In [None]:
# objective function

def objective_function(trial):

    # next hyperparameter values
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5, step=1, log=False)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8, log=False)


    # model init
    input_dim = 784
    output_dim = 10

    class 



    # params init

    # training loop

    # evaluation


    # return accuracy
    return accuracy

# NN Model 

In [59]:
class MyNN(nn.Module):

    def __init__(self, num_features):

        super().__init__()

        self.model = nn.Sequential(

            nn.Linear(num_features, 128),
            nn.BatchNorm1d(128), # batch normalization layer to stabilize and accelerate training, applied before activation functions, 
                                 #       128 - is the number of features from the previous layer, 1d as we have 1 dimensional data
            nn.ReLU(),
            nn.Dropout(p=0.2), # using dropout to prevent overfitting, dropout rate of 20%, used to prevent overfitting, applied after activation functions
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(64, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(16, 10),
            # nn.Softmax(dim=1) - softmax is not required in PyTorch as it is included in the CrossEntropyLoss
        )

    def forward(self, x):
        return self.model(x)



In [None]:
# set learning rate and epochs
epochs = 300

learning_rate = 0.03


# instantiate the model

model = MyNN(X_train.shape[1])

# define the loss function

loss_function = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.0001) # weight decay is mathematically equivalent to L2 regularization


In [61]:
len(train_loader)

500

In [62]:
# training loop
i = 1
total_epoch_loss = []
for epoch in range(epochs):
    print(f"----------- Epoch {epoch+1}/{epochs} -----------")
    epoch_loss = []

    for batch_features, batch_labels in train_loader:

        
        # forward pass
        outputs = model(batch_features)

      
        

        # loss calculation
        loss = loss_function(outputs, batch_labels.squeeze())
        total_epoch_loss.append(loss.item())

        # backprop

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        epoch_loss.append(loss.item())

    print(f" avg epoch loss: {sum(epoch_loss)/len(epoch_loss)} ")

        # update gradients

----------- Epoch 1/700 -----------


 avg epoch loss: 1.2202758622169494 
----------- Epoch 2/700 -----------
 avg epoch loss: 0.7627844249010086 
----------- Epoch 3/700 -----------
 avg epoch loss: 0.6708186898231506 
----------- Epoch 4/700 -----------
 avg epoch loss: 0.6181969386935234 
----------- Epoch 5/700 -----------
 avg epoch loss: 0.5801185286343098 
----------- Epoch 6/700 -----------
 avg epoch loss: 0.5541691065728664 
----------- Epoch 7/700 -----------
 avg epoch loss: 0.5313997372090816 
----------- Epoch 8/700 -----------
 avg epoch loss: 0.5168785937428474 
----------- Epoch 9/700 -----------
 avg epoch loss: 0.4951062305569649 
----------- Epoch 10/700 -----------
 avg epoch loss: 0.4924792742431164 
----------- Epoch 11/700 -----------
 avg epoch loss: 0.4793404828608036 
----------- Epoch 12/700 -----------
 avg epoch loss: 0.46812558647990227 
----------- Epoch 13/700 -----------
 avg epoch loss: 0.44879987913370134 
----------- Epoch 14/700 -----------
 avg epoch loss: 0.4467666516005993 
-------

In [63]:
len(test_loader)

63

In [64]:
# evaluation
model.eval()


total = 0
correct = 0 
i = 1
cnt =0
with torch.no_grad():
    for batch_features, batch_labels in test_loader:

        cnt +=1
            
        test_outputs = model(batch_features)

        _, predicted = torch.max(test_outputs, 1)
        
        if i <2:
            print(f"_: {_}, len: {len(_)}")
            print("Predicted:", predicted)
            print("Actual   :", batch_labels.squeeze())
            i += 1
        
        total += batch_labels.shape[0]
        correct += (predicted == batch_labels.squeeze()).sum().item()

print(f" total cnt : {cnt}")
accuracy = 100 * correct / total
print(f"Accuracy of the model on the test set: {accuracy:.2f}%")















_: tensor([20.2203, 14.5283, 10.5542,  9.5363,  9.6815, 19.0161, 17.2712, 21.9782,
        14.6558, 24.3794,  6.3633, 13.1010, 19.1785, 31.8668,  6.4249, 23.0801,
        22.3702, 10.0810, 29.5789, 21.4938, 24.9441,  5.7097, 11.0885, 23.6081,
        11.7713, 30.3535, 27.1362, 11.5558, 21.3798, 15.7598,  8.2601, 22.8211,
        24.1115, 14.2723,  4.6293,  9.6025,  5.5962,  7.4510, 17.6680, 11.4184,
        12.6121, 26.0036,  9.6368, 17.2987, 24.6082,  8.3409, 14.0982,  9.6182,
        12.6412,  7.2364, 22.3319,  6.1591,  8.4048, 22.2118,  3.3828, 13.1297,
        24.8780,  5.1471, 12.2952,  6.2346, 15.6126, 15.3296, 18.7507, 13.4052]), len: 64
Predicted: tensor([2, 8, 3, 3, 2, 9, 9, 7, 1, 7, 3, 6, 5, 9, 4, 5, 1, 8, 9, 7, 8, 4, 3, 1,
        4, 9, 5, 6, 7, 8, 5, 5, 8, 4, 6, 8, 4, 6, 8, 6, 0, 1, 0, 2, 5, 2, 2, 4,
        0, 4, 1, 0, 3, 1, 2, 2, 1, 0, 0, 6, 8, 2, 8, 4])
Actual   : tensor([2, 8, 3, 3, 2, 9, 9, 7, 1, 7, 3, 6, 5, 9, 2, 5, 1, 8, 9, 7, 8, 3, 3, 1,
        4, 9, 5, 6, 7, 8, 5,

In [None]:
# Accuracy of the model on the test set: 84.45%
# Accuracy of the model on the test set: 84.70%
# Accuracy of the model on the test set: 84.30%
# Accuracy of the model on the test set: 84.90%
# Accuracy of the model on the test set: 87.45%
# Accuracy of the model on the test set: 85.92% - increasing the batch size to 100 
# Accuracy of the model on the test set: 87.08% - batch size 100, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added
# Accuracy of the model on the test set: 87.67% - batch size 64, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added, batch normalization layers added
# Accuracy of the model on the test set: 86.30% - batch size 32, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added, batch normalization layers added




In [66]:
torch.device("cuda" if torch.cuda.is_available() else "cpu")

device(type='cpu')