In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

In [2]:
train_df = pd.read_csv('train.csv')
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [3]:
np.sqrt(784)

28.0

We have 28*28 pixel images of digits from 0 to 9

In [None]:
# train_df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = train_df.drop('label', axis=1)
y = train_df['label']

## Simple Multilayer-Perceptron Neural Network:

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

In [17]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # defining layers:
        self.fc1 = nn.Linear(784, 16)
        self.fc2 = nn.Linear(16, 16)
        self.out = nn.Linear(16, 10)
        
    def forward(self, x):
        # defining forward pass:
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.out(x)
        return x

In [39]:
model = Classifier()

In [40]:
criterion = nn.CrossEntropyLoss()   # for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [41]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)

### Training the model:

In [43]:
for epoch in range(5000):
    model.train()  # set the model to training mode
    optimizer.zero_grad()  # zero the gradients
    
    outputs = model(X_train_tensor)  # forward pass
    
    # compute loss
    loss = criterion(outputs, y_train_tensor)
    
    # backward pass
    loss.backward()
    
    # update weights
    optimizer.step()
    
    
    # Compute accuracy
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == y_train_tensor).sum().item()
    accuracy = correct / len(y_train_tensor)

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/5000], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')

Epoch [10/5000], Loss: 2.9055, Accuracy: 0.1910
Epoch [20/5000], Loss: 2.1197, Accuracy: 0.1868
Epoch [30/5000], Loss: 1.9069, Accuracy: 0.2993
Epoch [40/5000], Loss: 1.7377, Accuracy: 0.3596
Epoch [50/5000], Loss: 1.6151, Accuracy: 0.4029
Epoch [60/5000], Loss: 1.4992, Accuracy: 0.4471
Epoch [70/5000], Loss: 1.3869, Accuracy: 0.5089
Epoch [80/5000], Loss: 1.2849, Accuracy: 0.5516
Epoch [90/5000], Loss: 1.1939, Accuracy: 0.5944
Epoch [100/5000], Loss: 1.1138, Accuracy: 0.6321
Epoch [110/5000], Loss: 1.0472, Accuracy: 0.6612
Epoch [120/5000], Loss: 0.9901, Accuracy: 0.6811
Epoch [130/5000], Loss: 0.9375, Accuracy: 0.6972
Epoch [140/5000], Loss: 0.8852, Accuracy: 0.7095
Epoch [150/5000], Loss: 0.8296, Accuracy: 0.7206
Epoch [160/5000], Loss: 0.7711, Accuracy: 0.7648
Epoch [170/5000], Loss: 0.7175, Accuracy: 0.7912
Epoch [180/5000], Loss: 0.6778, Accuracy: 0.8004
Epoch [190/5000], Loss: 0.6481, Accuracy: 0.8103
Epoch [200/5000], Loss: 0.6244, Accuracy: 0.8171
Epoch [210/5000], Loss: 0.604

We get training accuracy of 98.5%. However, at this point it might be overfitting (memorizing training datapoints too well as it has a lot of parameters and thus high capacity for memorization)

### Now, evaluating on validation dataset:

In [44]:
model.eval()

Classifier(
  (fc1): Linear(in_features=784, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (out): Linear(in_features=16, out_features=10, bias=True)
)

In [45]:
with torch.no_grad():
    predictions = model(X_val_tensor)
    probs = torch.softmax(predictions, dim=1)
    pred = torch.argmax(probs, dim=1).numpy()

In [46]:
from sklearn.metrics import accuracy_score, classification_report

In [47]:
accuracy_score(y_val, pred)

0.9061904761904762

In [49]:
print(classification_report(y_val, pred))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       816
           1       0.96      0.97      0.96       909
           2       0.88      0.88      0.88       846
           3       0.89      0.88      0.89       937
           4       0.90      0.90      0.90       839
           5       0.88      0.90      0.89       702
           6       0.91      0.93      0.92       785
           7       0.93      0.90      0.92       893
           8       0.89      0.87      0.88       835
           9       0.88      0.88      0.88       838

    accuracy                           0.91      8400
   macro avg       0.91      0.91      0.91      8400
weighted avg       0.91      0.91      0.91      8400



In [50]:
test = pd.read_csv('test.csv')
test_tensor = torch.tensor(test.values, dtype=torch.float32)

In [53]:
test_tensor.shape

torch.Size([28000, 784])

### Getting predictions for the unseen test data:

In [54]:
with torch.no_grad():
    predictions = model(test_tensor)
    probs = torch.softmax(predictions, dim=1)
    pred_test = torch.argmax(probs, dim=1).numpy()

In [56]:
pred_test[:10]

array([2, 0, 1, 4, 3, 7, 0, 3, 0, 3], dtype=int64)

In [57]:
results = pd.DataFrame({'ImageId': np.arange(1, len(pred_test) + 1), 'Label': pred_test})
results.to_csv('Neural_Network_submission.csv', index=False)