In [16]:
# ! pip install kagglehub
# ! pip install matplotlib

In [17]:
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

In [18]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zalando-research/fashionmnist")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'fashionmnist' dataset.
Path to dataset files: /kaggle/input/fashionmnist


In [19]:
df = pd.read_csv(path + "/fashion-mnist_train.csv")

df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
sample_df = df.sample(n=30000, random_state=42)

In [21]:
sample_df.shape

(30000, 785)

In [22]:
X = sample_df.iloc[:, 1:].values
y = sample_df.iloc[:, :1].values


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Pre - Processing

In [24]:
st_scaler = StandardScaler()

In [25]:

X_train = st_scaler.fit_transform(X_train)
X_test = st_scaler.transform(X_test)

In [26]:
X_train

array([[-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802],
       [-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802],
       [-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802],
       ...,
       [-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802],
       [-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802],
       [-0.0100328 , -0.03207572, -0.04945422, ..., -0.16188057,
        -0.09292675, -0.03794802]])

# Dataset Class

In [27]:
# create custom dataset class

class FashionMNISTDataset(Dataset):

    def __init__(self, features, labels):
        print(type(features))
        print(type(labels))
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)


    def __len__(self):
        return len(self.features)


    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [28]:
train_dataset = FashionMNISTDataset(X_train, y_train)
test_dataset = FashionMNISTDataset(X_test, y_test)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [29]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# OPTUNA

In [30]:
# # objective function

# def objective_function(trial):

#     # next hyperparameter values
#     num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5, step=1, log=False)
#     neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8, log=False)


#     # model init
#     input_dim = 784
#     output_dim = 10

#     class 



#     # params init

#     # training loop

#     # evaluation


#     # return accuracy
#     return accuracy

# NN Model 

In [None]:
class MyNN(nn.Module):

    def __init__(self, num_features):

        super().__init__()

        self.model = nn.Sequential(

            nn.Linear(num_features, 128),
            nn.BatchNorm1d(128), # batch normaliza
        cnt +=1tion layer to stabilize and accelerate training, applied before activation functions, 
                                 #       128 - is the number of features from the previous layer, 1d as we have 1 dimensional data
            nn.ReLU(),
            nn.Dropout(p=0.2), # using dropout to prevent overfitting, dropout rate of 20%, used to prevent overfitting, applied after activation functions
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(64, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(16, 10),
            # nn.Softmax(dim=1) - softmax is not required in PyTorch as it is included in the CrossEntropyLoss
        )

    def forward(self, x):
        return self.model(x)



In [32]:
# set learning rate and epochs
epochs = 300

learning_rate = 0.03


# instantiate the model

model = MyNN(X_train.shape[1])

# define the loss function

loss_function = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.0001) # weight decay is mathematically equivalent to L2 regularization


In [33]:
len(train_loader)

375

In [None]:
# training loop
i = 1
total_epoch_loss = []
for epoch in range(epochs):
    print(f"----------- Epoch {epoch+1}/{epochs} -----------")
    epoch_loss = []

    for batch_features, batch_labels in train_loader:

        
        # forward pass
        outputs = model(batch_features)

      
        

        # loss calculation
        loss = loss_function(outputs, batch_labels.squeeze())
        total_epoch_loss.append(loss.item())

        # backprop

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        epoch_loss.append(loss.item())

    print(f" avg epoch loss: {sum(epoch_loss)/len(epoch_loss)} ")

        # update gradients

----------- Epoch 1/300 -----------
 avg epoch loss: 1.173630410194397 
----------- Epoch 2/300 -----------
 avg epoch loss: 0.74077583471934 
----------- Epoch 3/300 -----------
 avg epoch loss: 0.6506297153631846 
----------- Epoch 4/300 -----------
 avg epoch loss: 0.6076108044783274 
----------- Epoch 5/300 -----------
 avg epoch loss: 0.5721842919985454 
----------- Epoch 6/300 -----------
 avg epoch loss: 0.5444732427597045 
----------- Epoch 7/300 -----------
 avg epoch loss: 0.5141473467350006 
----------- Epoch 8/300 -----------
 avg epoch loss: 0.5005755435625712 
----------- Epoch 9/300 -----------
 avg epoch loss: 0.4842878180742264 
----------- Epoch 10/300 -----------
 avg epoch loss: 0.46880221549669904 
----------- Epoch 11/300 -----------
 avg epoch loss: 0.4562778458595276 
----------- Epoch 12/300 -----------
 avg epoch loss: 0.44742696424325307 
----------- Epoch 13/300 -----------
 avg epoch loss: 0.43250371384620667 
----------- Epoch 14/300 -----------
 avg epoch

In [35]:
len(test_loader)

94

In [None]:
# evaluation
model.eval()


total = 0
correct = 0 
i = 1
cnt =0
with torch.no_grad():
    for batch_features, batch_labels in test_loader:

        cnt +=1
            
        test_outputs = model(batch_features)

        _, predicted = torch.max(test_outputs, 1)
        
        if i <2:
            print(f"_: {_}, len: {len(_)}")
            print("Predicted:", predicted)
            print("Actual   :", batch_labels.squeeze())
            i += 1
        
        total += batch_labels.shape[0]
        correct += (predicted == batch_labels.squeeze()).sum().item()

print(f" total cnt : {cnt}")
accuracy = 100 * correct / total
print(f"Accuracy of the model on the test set: {accuracy:.2f}%")















_: tensor([22.7425, 18.8435, 19.0904, 19.5416, 22.1023,  5.3151, 29.7748,  3.8748,
        15.3527, 24.0269,  6.4164,  9.6461,  9.0416, 20.6596,  6.8374, 17.8770,
         8.6189, 15.0795,  9.8777, 22.4432,  8.3527,  8.4552, 20.9496, 12.2945,
        15.6015,  6.1589, 12.2478, 27.4259, 20.5952, 11.1070, 14.0889, 15.5195,
         7.2379, 11.3217, 13.8385, 10.9708, 23.9063, 15.1887, 13.9677, 14.5753,
         6.3640,  9.9296, 11.5687, 17.7541, 14.5306,  6.9519, 20.4176, 11.4977,
        18.2846, 14.2363, 12.6667, 20.7846,  6.4981, 21.2807, 17.6593, 13.9336,
        25.2330, 27.3003, 16.1608, 15.1575, 12.6086, 19.3762, 22.1285,  6.8764]), len: 64
Predicted: tensor([1, 1, 5, 1, 5, 6, 9, 6, 2, 1, 4, 7, 0, 1, 3, 6, 4, 3, 0, 7, 4, 2, 7, 4,
        7, 4, 4, 5, 8, 2, 7, 2, 6, 9, 0, 2, 8, 8, 4, 7, 6, 0, 3, 2, 3, 3, 3, 2,
        9, 0, 4, 3, 6, 9, 3, 6, 1, 8, 7, 8, 1, 9, 3, 8])
Actual   : tensor([1, 1, 5, 1, 5, 2, 9, 4, 2, 1, 2, 7, 0, 1, 3, 6, 6, 3, 0, 7, 4, 2, 7, 4,
        9, 2, 4, 5, 8, 2, 7,

In [None]:
# Accuracy of the model on the test set: 84.45%
# Accuracy of the model on the test set: 84.70%
# Accuracy of the model on the test set: 84.30%
# Accuracy of the model on the test set: 84.90%
# Accuracy of the model on the test set: 87.45%
# Accuracy of the model on the test set: 85.92% - increasing the batch size to 100 
# Accuracy of the model on the test set: 87.08% - batch size 100, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added
# Accuracy of the model on the test set: 87.67% - batch size 64, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added, batch normalization layers added
# Accuracy of the model on the test set: 86.30% - batch size 32, learning rate 0.05, hidden layers: 128, 64, 16, 10,  dropout layers added, batch normalization layers added
# Accuracy of the model on the test set: 88.32% - batch size 64, learning rate 0.05, hidden layers: 128, 64, 16, 10, decay: .0001, epochs- 300, dropout layers added, batch normalization layers added




In [38]:
torch.device("cuda" if torch.cuda.is_available() else "cpu")

device(type='cpu')