In [1]:
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# CREATE MORE IMAGES VIA DATA AUGMENTATION
# import keras.preprocessing
# datagen = keras.preprocessing.image.ImageDataGenerator(
#         rotation_range=10,  
#         zoom_range = 0.10,  
#         width_shift_range=0.1, 
#         height_shift_range=0.1)

from torchvision import transforms
transform = transforms.Compose([
    transforms.RandomRotation(10),  # Rotate images by up to 10 degrees
    transforms.RandomResizedCrop(28, scale=(0.9, 1.1)),  # Zoom augmentation
    transforms.RandomAffine(0, translate=(0.1, 0.1)),  # Width and height shift
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [5]:
train_data = pd.read_csv("D:/Duong-Desktop/Code AI/digit-recognizer/train.csv") 

x_train = train_data.drop("label", axis = 1).values
y_train = train_data["label"].values

x_train = x_train.reshape(-1, 1, 28, 28).astype('float32') / 255
y_train = torch.tensor(y_train, dtype=torch.long)

In [12]:
from sklearn.model_selection import train_test_split

X_train2, X_val2, Y_train2, Y_val2 = train_test_split(x_train, y_train, test_size=0.1)

train_dataset = TensorDataset(torch.tensor(X_train2), torch.tensor(Y_train2))
val_dataset = TensorDataset(torch.tensor(X_val2), torch.tensor(Y_val2))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

  train_dataset = TensorDataset(torch.tensor(X_train2), torch.tensor(Y_train2))
  val_dataset = TensorDataset(torch.tensor(X_val2), torch.tensor(Y_val2))


In [13]:
train_dataset = TensorDataset(torch.tensor(x_train), y_train)
# test_dataset = TensorDataset(torch.tensor(x_test), y_test)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5, 5))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5, 5))
        
        # Compute the correct flatten size dynamically
        self.flatten_size = self._get_conv_output((1, 28, 28))
        
        self.fc1 = nn.Linear(self.flatten_size, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def _get_conv_output(self, shape):
        with torch.no_grad():
            x = torch.zeros(1, *shape)
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            return x.numel()
    
    def forward(self, x):
        # print(x.shape)
        x = F.relu(self.conv1(x))
        # print(x.shape)
        x = self.pool(x)
        # print(x.shape)
        x = F.relu(self.conv2(x))
        # print(x.shape)
        x = self.pool(x)
        # print(x.shape)
        x = torch.flatten(x, 1)  # Flatten the output
        # print(x.shape)
        x = F.relu(self.fc1(x))
        # print(x.shape)
        x = F.relu(self.fc2(x))
        # print(x.shape)
        x = self.fc3(x)
        # print(x.shape)
        print()
        return x

In [26]:
# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nets = 1  # Number of networks (models) to train
models = [CNN().to(device) for _ in range(nets)]
criterion = nn.CrossEntropyLoss()
optimizers = [optim.Adam(model.parameters(), lr=0.001) for model in models]
schedulers = [optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.95 ** epoch) for optimizer in optimizers]

In [28]:
# Training loop
epochs = 45
for j in range(nets):
    model = models[j]
    optimizer = optimizers[j]
    scheduler = schedulers[j]
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        scheduler.step()
        
        train_acc = 100 * correct / total
        
        # Validation
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_acc = 100 * val_correct / val_total
        print(f"CNN {j+1}: Epoch={epoch+1}, Train accuracy={train_acc:.5f}, Validation accuracy={val_acc:.5f}")


torch.Size([128, 1, 28, 28])
torch.Size([128, 6, 24, 24])
torch.Size([128, 6, 12, 12])
torch.Size([128, 16, 8, 8])
torch.Size([128, 16, 4, 4])
torch.Size([128, 256])
torch.Size([128, 120])
torch.Size([128, 84])
torch.Size([128, 10])

torch.Size([128, 1, 28, 28])
torch.Size([128, 6, 24, 24])
torch.Size([128, 6, 12, 12])
torch.Size([128, 16, 8, 8])
torch.Size([128, 16, 4, 4])
torch.Size([128, 256])
torch.Size([128, 120])
torch.Size([128, 84])
torch.Size([128, 10])

torch.Size([128, 1, 28, 28])
torch.Size([128, 6, 24, 24])
torch.Size([128, 6, 12, 12])
torch.Size([128, 16, 8, 8])
torch.Size([128, 16, 4, 4])
torch.Size([128, 256])
torch.Size([128, 120])
torch.Size([128, 84])
torch.Size([128, 10])

torch.Size([128, 1, 28, 28])
torch.Size([128, 6, 24, 24])
torch.Size([128, 6, 12, 12])
torch.Size([128, 16, 8, 8])
torch.Size([128, 16, 4, 4])
torch.Size([128, 256])
torch.Size([128, 120])
torch.Size([128, 84])
torch.Size([128, 10])

torch.Size([128, 1, 28, 28])
torch.Size([128, 6, 24, 24])
torch.

In [30]:
# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1, Loss: 0.5050
Epoch 2, Loss: 0.1429
Epoch 3, Loss: 0.0991
Epoch 4, Loss: 0.0791
Epoch 5, Loss: 0.0647
Epoch 6, Loss: 0.0558
Epoch 7, Loss: 0.0485
Epoch 8, Loss: 0.0437
Epoch 9, Loss: 0.0399
Epoch 10, Loss: 0.0347
Epoch 11, Loss: 0.0320
Epoch 12, Loss: 0.0265
Epoch 13, Loss: 0.0264
Epoch 14, Loss: 0.0213
Epoch 15, Loss: 0.0202
Epoch 16, Loss: 0.0184
Epoch 17, Loss: 0.0158
Epoch 18, Loss: 0.0146
Epoch 19, Loss: 0.0141
Epoch 20, Loss: 0.0152


In [17]:
import numpy as np
submission = pd.read_csv("D:/Duong-Desktop/Code AI/digit-recognizer/sample_submission.csv")

test_data = pd.read_csv("D:/Duong-Desktop/Code AI/digit-recognizer/test.csv")
test_data = test_data.values.reshape(-1, 1, 28, 28).astype(np.float32)  # Add channel dimension
test_data = test_data / 255.0  # Normalize the pixel values (scale to [0, 1])

submission_tensor = torch.tensor(test_data)
answer = model(submission_tensor)

In [18]:
submission = pd.read_csv("D:/Duong-Desktop/Code AI/digit-recognizer/sample_submission.csv")
submission["Label"] = answer.argmax(axis = 1)

In [None]:
submission.to_csv("submission.csv", index = False)

In [None]:
# model.eval()
# correct = 0
# total = 0
# with torch.no_grad():
#     for images, labels in test_loader:
#         images, labels = images.to(device), labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print(f"Test Accuracy: {100 * correct / total:.2f}%")