In [186]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

In [187]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
print(f"{device=}")

device=device(type='cuda')


In [168]:
df = pd.read_csv('fashion-mnist_train.csv')

In [169]:
df.shape

(60000, 785)

In [170]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB


## Train - Cross Validation Split

In [171]:
X_train, X_cv, y_train, y_cv = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size=0.1)

In [172]:
print(f'{X_train.shape=}, {y_train.shape=}')
print(f'{X_cv.shape=}, {y_cv.shape=}')

X_train.shape=(54000, 784), y_train.shape=(54000,)
X_cv.shape=(6000, 784), y_cv.shape=(6000,)


## Scaling and converting to numpy

In [173]:
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_cv = scaler.transform(X_cv)
X_train = X_train.to_numpy() / 255.0
X_cv = X_cv.to_numpy() / 255.0

## To Pytorch

In [174]:
X_train = torch.from_numpy(X_train).float()
X_cv = torch.from_numpy(X_cv).float()
y_train = torch.from_numpy(y_train.to_numpy())
y_cv = y_cv.to_numpy()

# Creating Model

In [175]:
class FashionMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(784, 196),
        nn.ReLU(),
        nn.Linear(196, 49),
        nn.ReLU(),
        nn.Linear(49, 25),
        nn.ReLU(),
        nn.Linear(25, 10),
        # no need to use softmax, it is implemented in CrossEntropyLoss
    )


  def forward(self, X):
    outputs = self.network(X)
    return outputs

  def predict(self, X):
    with torch.no_grad():
      y_pred = self.network(X)
      return y_pred


In [176]:
model = FashionMNIST()

In [177]:
model = model.to(device)

In [178]:
class CustomDataset(Dataset):
  def __init__(self, X, y):
    self.features = X
    self.labels = y

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [179]:
trainset = CustomDataset(X_train, y_train)
cvset = CustomDataset(X_cv, y_cv)

In [180]:
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
cvloader = DataLoader(cvset, batch_size=32, shuffle = False)

In [181]:
epochs = 20
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [182]:
for epoch in range(epochs):

  for batch_features, batch_labels in trainloader:


    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)
    outputs = model(batch_features)


    loss = loss_function(outputs, batch_labels)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  print(f"Epoch: {epoch+1} Loss: {loss.item()}")

Epoch: 1 Loss: 0.2714090049266815
Epoch: 2 Loss: 0.19342435896396637
Epoch: 3 Loss: 0.47341299057006836
Epoch: 4 Loss: 0.2180338054895401
Epoch: 5 Loss: 0.5396806001663208
Epoch: 6 Loss: 0.28576070070266724
Epoch: 7 Loss: 0.07532680779695511
Epoch: 8 Loss: 0.26148897409439087
Epoch: 9 Loss: 0.11551256477832794
Epoch: 10 Loss: 0.46144312620162964
Epoch: 11 Loss: 0.6478263139724731
Epoch: 12 Loss: 0.18340066075325012
Epoch: 13 Loss: 0.33761513233184814
Epoch: 14 Loss: 0.11308816075325012
Epoch: 15 Loss: 0.031122548505663872
Epoch: 16 Loss: 0.028968334197998047
Epoch: 17 Loss: 0.18066342175006866
Epoch: 18 Loss: 0.15475238859653473
Epoch: 19 Loss: 0.4842650592327118
Epoch: 20 Loss: 0.24254529178142548


## Set Model for evaluation

In [183]:
model.eval()

FashionMNIST(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=196, bias=True)
    (1): ReLU()
    (2): Linear(in_features=196, out_features=49, bias=True)
    (3): ReLU()
    (4): Linear(in_features=49, out_features=25, bias=True)
    (5): ReLU()
    (6): Linear(in_features=25, out_features=10, bias=True)
  )
)

In [184]:
# Training Performance
total = 0
accurate = 0

with torch.no_grad():
  for features, labels in trainloader:
    features = features.to(device)
    labels = labels.to(device)
    outputs = model(features)


    y_pred = torch.softmax(outputs, dim=1)
    y_pred = torch.argmax(y_pred, dim=1)

    total += labels.shape[0]
    accurate += (y_pred == labels).sum().item()

print(f"Accuracy: {accurate/total}")

Accuracy: 0.9339259259259259


In [185]:
# Cross-Validation Performance
total = 0
accurate = 0

with torch.no_grad():
  for features, labels in cvloader:
    features = features.to(device)
    labels = labels.to(device)
    outputs = model(features)


    y_pred = torch.softmax(outputs, dim=1)
    y_pred = torch.argmax(y_pred, dim=1)

    total += labels.shape[0]
    accurate += (y_pred == labels).sum().item()

print(f"Accuracy: {accurate/total}")

Accuracy: 0.8953333333333333


Using Z-score:
Training Accuracy = 96%, Cross Validation Accuracy = 89%
#####Dividing by max: Training Accuracy = 93%, Cross Validation Accuracy = 89%