In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [5]:
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)


val_size = int(0.1 * len(train_dataset))
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])


train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:01<00:00, 17.5MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 307kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 5.65MB/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 19.6MB/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [6]:
class CNNModel(nn.Module):
    def __init__(self, kernel_sizes, strides, paddings, init_method="random"):
        super(CNNModel, self).__init__()
        self.kernel_sizes = kernel_sizes
        self.strides = strides
        self.paddings = paddings
        self.init_method = init_method
        
        self.conv1 = nn.Conv2d(1, 64, kernel_size=kernel_sizes[0], stride=strides[0], padding=paddings[0])
        self.bn1 = nn.BatchNorm2d(64)
        
        self.conv2 = nn.Conv2d(64, 192, kernel_size=kernel_sizes[1], stride=strides[1], padding=paddings[1])
        self.bn2 = nn.BatchNorm2d(192)
        
        self.conv3 = nn.Conv2d(192, 384, kernel_size=kernel_sizes[2], stride=strides[2], padding=paddings[2])
        self.bn3 = nn.BatchNorm2d(384)
        
        self.conv4 = nn.Conv2d(384, 256, kernel_size=kernel_sizes[3], stride=strides[3], padding=paddings[3])
        self.bn4 = nn.BatchNorm2d(256)
        
        self.conv5 = nn.Conv2d(256, 256, kernel_size=kernel_sizes[4], stride=strides[4], padding=paddings[4])
        self.bn5 = nn.BatchNorm2d(256)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(256 * 7 * 7, 256)
        self.fc2 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.5)
        
        self.initialize_weights()
        
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                if self.init_method == "xavier":
                    nn.init.xavier_uniform_(m.weight)
                elif self.init_method == "he":
                    nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
                else:
                    nn.init.uniform_(m.weight, -0.1, 0.1)
                
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.dropout(x)
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.dropout(x)
        x = F.relu(self.bn5(self.conv5(x)))
        x = self.dropout(x)
        x = x.view(-1, 256 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [7]:
kernel_sizes = [3, 3, 3, 3, 3]
strides = [1, 1, 1, 1, 1]
paddings = [1, 1, 1, 1, 1]
model = CNNModel(kernel_sizes, strides, paddings, "xavier").to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0015, weight_decay=1e-5)

In [8]:
def train(model, train_loader, criterion, optimizer, epoch):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 100 == 99:
            print(f'Epoch {epoch}, Batch {batch_idx + 1}, Loss: {running_loss / 100:.6f}')
            running_loss = 0.0


from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            
            all_predictions.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    val_loss /= len(val_loader.dataset)
    val_accuracy = 100. * correct / len(val_loader.dataset)

    precision, recall, f1, _ = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')
    accuracy = accuracy_score(all_targets, all_predictions)

    print(f'loss: {val_loss:.6f}, Accuracy: {val_accuracy:.2f}%')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1 Score: {f1:.2f}')

    return val_loss, val_accuracy

In [9]:
num_epochs = 30
best_val_loss = float('inf')
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    val_loss, val_accuracy = validate(model, val_loader, criterion)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')

print('Training complete!')

Epoch 1, Batch 100, Loss: 1.477119
Epoch 1, Batch 200, Loss: 0.802524
Epoch 1, Batch 300, Loss: 0.696411
Epoch 1, Batch 400, Loss: 0.643884
loss: 0.003317, Accuracy: 84.42%
Precision: 0.84
Recall: 0.84
F1 Score: 0.84
Epoch 2, Batch 100, Loss: 0.552883
Epoch 2, Batch 200, Loss: 0.551057
Epoch 2, Batch 300, Loss: 0.530709
Epoch 2, Batch 400, Loss: 0.515590
loss: 0.002811, Accuracy: 86.28%
Precision: 0.87
Recall: 0.86
F1 Score: 0.86
Epoch 3, Batch 100, Loss: 0.487248
Epoch 3, Batch 200, Loss: 0.480644
Epoch 3, Batch 300, Loss: 0.482166
Epoch 3, Batch 400, Loss: 0.444388
loss: 0.002439, Accuracy: 88.27%
Precision: 0.89
Recall: 0.88
F1 Score: 0.88
Epoch 4, Batch 100, Loss: 0.434189
Epoch 4, Batch 200, Loss: 0.447252
Epoch 4, Batch 300, Loss: 0.419754
Epoch 4, Batch 400, Loss: 0.423353
loss: 0.002175, Accuracy: 89.97%
Precision: 0.90
Recall: 0.90
F1 Score: 0.90
Epoch 5, Batch 100, Loss: 0.385184
Epoch 5, Batch 200, Loss: 0.410583
Epoch 5, Batch 300, Loss: 0.401157
Epoch 5, Batch 400, Loss: 0

In [10]:
model.load_state_dict(torch.load('best_model.pth'))
test_loss, test_accuracy = validate(model, test_loader, criterion)
print(f'Test loss: {test_loss:.6f}, Accuracy: {test_accuracy:.2f}%')

  model.load_state_dict(torch.load('best_model.pth'))


loss: 0.001908, Accuracy: 92.60%
Precision: 0.93
Recall: 0.93
F1 Score: 0.93
Test loss: 0.001908, Accuracy: 92.60%
