In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import torch.optim as optim

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
df = pd.read_csv("../../Data/fashion-mnist_train.csv")
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [7]:
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

# 1. Dataload

In [8]:
from torch.utils.data import Dataset, DataLoader


class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [9]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [10]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, pin_memory=True)

# 2. Model Build

In [11]:
class MyCNN(nn.Module):
    def __init__(self, input_features):
        super().__init__()
        
        # feature extraction
        self.features = nn.Sequential(
            # 1st Layer
            nn.Conv2d(input_features, 32, kernel_size=3, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # 2nd Layer
            nn.Conv2d(32, 64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)     
        )
        
        # classification
        self.classifier = nn.Sequential(
            nn.Flatten(),
            
            # 1st Layer
            nn.Linear(64*7*7, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            #2nd Layer
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            # output
            nn.Linear(64,10)
    
        ) 
        
    def forward(self,x):
        return self.classifier(self.features(x))

# 3. Training Model

In [12]:
# hyperparameter

epochs = 100
learning_rate = 0.1

In [13]:
# initialize model 
model = MyCNN(1)
model.to(device)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [14]:

for epoch in range(epochs):
    total_epoch_loss = 0
    for batch_features, batch_labels in train_dataloader:
        
        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        # forward propagation
        y_pred = model(batch_features)
        
        # loss calculate
        loss = criterion(y_pred, batch_labels)
        
        # reinitialize gradient
        optimizer.zero_grad()
        
        # backpropagation
        loss.backward()
        
        # update weight and bias
        optimizer.step()
        
        total_epoch_loss += loss.item()
    avg_loss = total_epoch_loss/len(train_dataloader)
    print(f"Epoch: {epoch+1} : Loss {avg_loss}")

Epoch: 1 : Loss 0.5324869014173746
Epoch: 2 : Loss 0.36563057149946687
Epoch: 3 : Loss 0.31719733282426993
Epoch: 4 : Loss 0.28853838541855414
Epoch: 5 : Loss 0.2668565167387327
Epoch: 6 : Loss 0.24793364566688736
Epoch: 7 : Loss 0.23485948459059
Epoch: 8 : Loss 0.22320604631553093
Epoch: 9 : Loss 0.21094917598863444
Epoch: 10 : Loss 0.20288409852981568
Epoch: 11 : Loss 0.19014295223603647
Epoch: 12 : Loss 0.18341617381572722
Epoch: 13 : Loss 0.17660745947249235
Epoch: 14 : Loss 0.1720113220165173
Epoch: 15 : Loss 0.16717878184405466
Epoch: 16 : Loss 0.159174125155434
Epoch: 17 : Loss 0.1583299109072735
Epoch: 18 : Loss 0.14798139578166108
Epoch: 19 : Loss 0.1458799462467432
Epoch: 20 : Loss 0.14180204453505577
Epoch: 21 : Loss 0.1350604522898793
Epoch: 22 : Loss 0.13721462263042727
Epoch: 23 : Loss 0.13038012066980204
Epoch: 24 : Loss 0.12818725681304932
Epoch: 25 : Loss 0.12496065479827424
Epoch: 26 : Loss 0.12158836723072454
Epoch: 27 : Loss 0.1179376660361886
Epoch: 28 : Loss 0.117

# 4.Evaluation

In [15]:
model.eval()

MyCNN(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.4, inplace=False)
    (5): Linear(in_features=128, out_features=64, bias=True)
    (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReL

In [16]:
total = 0
correct = 0
with torch.no_grad():
    for batch_features, batch_labels in test_dataloader:
        
        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        # forward propagation
        y_pred = model(batch_features)
        
        _, predicted = torch.max(y_pred, 1)
        total += batch_features.shape[0]
        correct += (predicted == batch_labels).sum().item()
        
print(f"Accuracy : {correct/ total}")

Accuracy : 0.92475


In [17]:
total = 0
correct = 0
with torch.no_grad():
    for batch_features, batch_labels in train_dataloader:
        
        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        # forward propagation
        y_pred = model(batch_features)
        
        _, predicted = torch.max(y_pred, 1)
        total += batch_features.shape[0]
        correct += (predicted == batch_labels).sum().item()
        
print(f"Accuracy : {correct/ total}")

Accuracy : 0.9972916666666667
