In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import  torchvision
import torchvision.transforms.v2 as tfs


In [2]:
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import json
from PIL import Image

In [3]:
class DigitDataset(data.Dataset):
    def __init__(self, path, train=True, transforms=None):
        self.path = os.path.join(path, 'train' if train else 'test')
        self.transforms = transforms
        
        with open(os.path.join(path, 'format.json'), 'r') as f:
            self.format = json.load(f)

        self.length = 0
        self.files = []
        for class_name, target in self.format.items():
            dir_name = os.path.join(self.path, class_name)
            dir_files = os.listdir(dir_name)
            self.length += len(dir_files)
            for file in dir_files:
                img_path = os.path.join(dir_name, file)
                self.files.append((img_path,target))

    def __getitem__(self, item):
        img_path, target = self.files[item]
        img = Image.open(img_path)
        if self.transforms:
            img = self.transforms(img)

        return (img, target)

    def __len__(self):
        return self.length
            

In [4]:
transforms = tfs.Compose([tfs.ToTensor(),  tfs.ToDtype(torch.float32, scale=True)])
d = DigitDataset('/kaggle/input/mnist-dataset/dataset', train=True, transforms=transforms)



In [5]:
x, y = d[0]

In [6]:
type(x)

torch.Tensor

In [7]:
x.size()

torch.Size([1, 28, 28])

In [8]:
x

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [9]:
y

0

In [10]:
class DigitCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, padding=1)
        
        self.fc1 = nn.Linear(16*7*7, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x,2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x,2)
        x = x.view(-1, 16*7*7)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        

In [11]:
# model = nn.Sequential(
#         nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, padding=1),
#         nn.ReLU(),
#         nn.MaxPool2d(kernel_size=2),
#         nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, padding=1),
#         nn.ReLU(),
#         nn.MaxPool2d(kernel_size=2),
#         nn.Flatten(),
#         nn.Linear(16*7*7, 128),
#         nn.Linear(128, 64),
#         nn.Linear(64, 10)
# )

In [12]:
transforms = tfs.Compose([tfs.ToTensor(), tfs.ToDtype(torch.float32, scale=True)])
d_set = DigitDataset('/kaggle/input/mnist-dataset/dataset', train=True, transforms=transforms)
d_train, d_val = data.random_split(d_set, [0.7, 0.3])
train_data = data.DataLoader(d_train, batch_size=32, shuffle=True)
val_data = data.DataLoader(d_val, batch_size=32, shuffle=False)

model = DigitCNN()

optimizer = optim.Adam(params = model.parameters(), lr=0.01, weight_decay=0.001)
loss_func = nn.CrossEntropyLoss()



In [13]:
d = next(iter(train_data))

In [14]:
d[0].size()

torch.Size([32, 1, 28, 28])

In [15]:
d[1]

tensor([0, 3, 0, 9, 8, 1, 8, 6, 1, 9, 8, 4, 9, 8, 2, 4, 9, 9, 5, 9, 8, 3, 9, 8,
        9, 2, 0, 3, 3, 2, 7, 2])

In [16]:

model(d[0])

tensor([[-0.0525, -0.1002, -0.1010, -0.1146, -0.0319, -0.1274,  0.0408,  0.0145,
          0.1194, -0.0482],
        [-0.0531, -0.0949, -0.1058, -0.1043, -0.0311, -0.1247,  0.0485,  0.0176,
          0.1149, -0.0465],
        [-0.0461, -0.1029, -0.1068, -0.1158, -0.0159, -0.1344,  0.0393,  0.0095,
          0.1173, -0.0430],
        [-0.0511, -0.0997, -0.0994, -0.1103, -0.0342, -0.1197,  0.0447,  0.0197,
          0.1263, -0.0448],
        [-0.0528, -0.0958, -0.1087, -0.1110, -0.0303, -0.1178,  0.0505,  0.0232,
          0.1218, -0.0501],
        [-0.0508, -0.1018, -0.1039, -0.1145, -0.0259, -0.1286,  0.0399,  0.0153,
          0.1222, -0.0455],
        [-0.0474, -0.0908, -0.1005, -0.1159, -0.0309, -0.1231,  0.0521,  0.0246,
          0.1209, -0.0431],
        [-0.0472, -0.1023, -0.1024, -0.1176, -0.0143, -0.1273,  0.0445,  0.0135,
          0.1237, -0.0386],
        [-0.0542, -0.0969, -0.1008, -0.1121, -0.0250, -0.1277,  0.0457,  0.0125,
          0.1190, -0.0458],
        [-0.0495, -

In [17]:

model(d[0]).size()

torch.Size([32, 10])

In [18]:
d[1]

tensor([0, 3, 0, 9, 8, 1, 8, 6, 1, 9, 8, 4, 9, 8, 2, 4, 9, 9, 5, 9, 8, 3, 9, 8,
        9, 2, 0, 3, 3, 2, 7, 2])

In [19]:
epochs = 5
for e in range(epochs):
    lm_count = 0
    loss_mean = 0
    train_tqdm = tqdm(train_data)
    for x_train, y_train in train_tqdm:
        model.train()
        train_pred = model(x_train)
        loss = loss_func(train_pred, y_train)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lm_count += 1
        loss_mean = 1/lm_count * loss.item() + (1-1/lm_count)*loss_mean
        train_tqdm.set_description(f"{e+1}/{epochs} loss_mean = {loss_mean}")
        
    val_tqdm = tqdm(val_data) 
    Q = 0
    for x_val, y_val in val_tqdm:
        model.eval()
        val_pred = model(x_val)
        val_loss = loss_func(val_pred, y_val)

        Q += val_loss.item()
    Q = Q  / len(x_val)
    print(f"Q = {Q}")

st = model.state_dict()
torch.save(st, 'digit_cnn.pth')
        
        

1/5 loss_mean = 0.2067751530983456: 100%|██████████| 1313/1313 [05:49<00:00,  3.76it/s]
100%|██████████| 563/563 [02:20<00:00,  4.01it/s]


Q = 3.6252122493606294


2/5 loss_mean = 0.12509214442757238: 100%|██████████| 1313/1313 [01:02<00:00, 20.84it/s]
100%|██████████| 563/563 [00:23<00:00, 24.34it/s]


Q = 4.423255359710311


3/5 loss_mean = 0.11678675106474445: 100%|██████████| 1313/1313 [01:06<00:00, 19.89it/s]
100%|██████████| 563/563 [00:22<00:00, 25.04it/s]


Q = 4.099166180749307


4/5 loss_mean = 0.11162245396903456: 100%|██████████| 1313/1313 [01:07<00:00, 19.44it/s]
100%|██████████| 563/563 [00:23<00:00, 23.61it/s]


Q = 3.9204967883997597


5/5 loss_mean = 0.10285031894441432: 100%|██████████| 1313/1313 [01:07<00:00, 19.55it/s]
100%|██████████| 563/563 [00:23<00:00, 23.47it/s]

Q = 3.2917933080752846





In [20]:
import torch

model = DigitCNN()

transforms = tfs.Compose([tfs.ToTensor(), tfs.ToDtype(torch.float32, scale=True)])
d_test = DigitDataset('/kaggle/input/mnist-dataset/dataset', train=False, transforms=transforms)
data_test = data.DataLoader(d_test, batch_size=500, shuffle=False)


st = torch.load('digit_cnn.pth', weights_only=True)
model.load_state_dict(st)

model.eval()
for x_test, y_test in data_test:
    with torch.no_grad():
        pred_test = model(x_test)
        p1 = torch.argmax(pred_test, dim=1)
        acc = (p1 == y_test).float().mean().item()
        print(f"Accuracy = {acc}")

Accuracy = 0.9860000014305115
Accuracy = 0.9919999837875366
Accuracy = 0.9879999756813049
Accuracy = 0.9959999918937683
Accuracy = 0.9919999837875366
Accuracy = 0.9760000109672546
Accuracy = 0.9900000095367432
Accuracy = 0.9860000014305115
Accuracy = 0.9559999704360962
Accuracy = 0.9679999947547913
Accuracy = 0.949999988079071
Accuracy = 0.9520000219345093
Accuracy = 0.9779999852180481
Accuracy = 0.9800000190734863
Accuracy = 0.9639999866485596
Accuracy = 0.9580000042915344
Accuracy = 0.9860000014305115
Accuracy = 0.9819999933242798
Accuracy = 0.9679999947547913
Accuracy = 0.9620000123977661
