In [11]:
#Import all necessary libs
import torch                    #Pytorch - great for CV, CNN, Mac MPS (GPU)
import torch.nn as nn           #NN building blocks (nn : keras) : Linear Layers(Dense)...
import torch.nn.functional as F #activation, loss, function
from torch.utils.data import DataLoader, TensorDataset 

import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 

In [12]:
#Check if MPS is avaible, else use CPU 
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:",device)

torch.backends.mps.is_available(), torch.backends.mps.is_built()

Using device: mps


(True, True)

In [13]:
train_df = pd.read_csv("data/train.csv")
test_df = pd.read_csv("data/test.csv")

print("Train CSV shape:", train_df.shape) #(row, col)
print("Test CSV shape:", test_df.shape)   #flattened vectors 784 pixels/image
train_df.head(3)

Train CSV shape: (42000, 785)
Test CSV shape: (28000, 784)


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
#Process, Normalize, Reshape the image for CNN 
y_train = train_df.iloc[:, 0].values    #.values Series to NumPy arr
X_train = train_df.iloc[:, 1:].values

X_train = X_train.reshape(-1, 1, 28, 28)#(batch_size, channels, height, width)
X_train = X_train/255.0                 #Normalize pixel value 0to1

X_test = test_df.values.reshape(-1,1,28,28)/255.0

print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)
print("Labels shape:", y_train.shape)

Training data shape: (42000, 1, 28, 28)
Test data shape: (28000, 1, 28, 28)
Labels shape: (42000,)


In [15]:
#Train/Validation Split 90%T/10%V
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)
#TensorDataset - Numpy to PyTorch tensor type 
train_dataset = TensorDataset(
    torch.tensor(X_train_split, dtype=torch.float32),
    torch.tensor(y_train_split, dtype=torch.long)
)
val_dataset = TensorDataset(
    torch.tensor(X_val_split, dtype=torch.float32),
    torch.tensor(y_val_split, dtype=torch.long)
)

#DataLoader - batches + shuffle training - wrap tensorDS inside DataLoader 
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

print("TrainLoader batches: ", len(train_loader))
print("ValLoader batches: ", len(val_loader))

TrainLoader batches:  591
ValLoader batches:  66


In [16]:
#Build CNN model 
class CNN(nn.Module):       #inherits PyTorch base class nn.Module
    def __init__(self):     #__init__ magic function - When model created, run this setup code
        super().__init__()  #call parent class constructor 

        #Convolution layers 
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)     #grayscale, 32 filer&featuremaps, 3x3kernel
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        #conv1:28->26 after pool: 26 -> 13
        #conv2:13->11 after pool: 11->5
        #conv size equation: output_s = input_s-kernel_s + 1

        #Max Pooling - reduce size (noise) of 3D feature map
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) #move 2 pix/time, each 2x2 block keep max discard rest.

        #Fully connected layer [expects 1D vectors]
        #64*5*5 is num of features per image 
        self.fc1 = nn.Linear(64*5*5, 128) 
        self.fc2 = nn.Linear(128,10) #10 MNIST identities 0-9

    def forward(self, x):
        x=self.pool(F.relu(self.conv1(x)))#conL -> reLU -> pool 
        x=self.pool(F.relu(self.conv2(x)))#3d feature maps -> keep strong features -> look at big pic
        x=x.view(-1,64*5*5)               #3D to 1D vector for fcLs
        x=F.relu(self.fc1(x))
        x=self.fc2(x)                     #no relu need raw data neg to pos 
        return x

model = CNN().to(device)
print(model)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [17]:
#Loss Function(CrossEntropyLoss) + Optimizer(Adam)
import torch.optim as optim 

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [26]:
#Training 
epochs = 8
print("Start Training...")

for epoch in range(epochs): 
    model.train()
    running_loss=0

    for images, labels in train_loader: 
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs, labels)
        #gradients = instructions of how to change the weights
        loss.backward() #calc grad
        optimizer.step()#applies grad -> updates weight

        running_loss += loss.item()

    avg_loss = running_loss/len(train_loader) #of batches 1 epoch
    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
print("Complete training!")

Start Training...
Epoch 1/8, Loss: 0.0054
Epoch 2/8, Loss: 0.0016
Epoch 3/8, Loss: 0.0038
Epoch 4/8, Loss: 0.0039
Epoch 5/8, Loss: 0.0017
Epoch 6/8, Loss: 0.0037
Epoch 7/8, Loss: 0.0037
Epoch 8/8, Loss: 0.0017
Complete training!


In [27]:
#Validate Accuracy 
model.eval()
correct=0
total=0

with torch.no_grad(): #disable grad comput for val
    for images, labels in val_loader: 
        images, labels = images.to(device), labels.to(device)
        output = model(images)
        _, predicted = torch.max(output.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_accuracy = correct/total
print(f"Validation Accuracy: {val_accuracy*100:.2f}%")

Validation Accuracy: 98.64%


In [25]:
#Make Prediction (Kaggle Data)
model.eval()
predictions = []

with torch.no_grad():
    for images in torch.utils.data.DataLoader(torch.tensor(X_test, dtype=torch.float32), batch_size=64):
        images = images.to(device)
        output = model(images)
        _, predicted = torch.max(output.data, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({
    "ImageId" : np.arange(1, len(predictions)+1),
    "Label" : predictions
})

submission.to_csv("MNIST_submission_epoch10.csv", index=False)
print("CSV created")

CSV created
