In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
import pandas as pd

In [3]:
class DevanagariDataset(Dataset):
    
    def __init__(self, data_csv, train = True , img_transform = None):
        """
        Dataset init function
        
        INPUT:
        data_csv: Path to csv file containing [data, labels]
        train: 
            True: if the csv file has [data, labels] (Train data and Public Test Data) 
            False: if the csv file has only [data] and labels are not present.
        img_transform: List of preprocessing operations need to performed on image. 
        """
        self.data_csv = data_csv
        self.img_transform = img_transform
        self.is_train = train
        
        data = pd.read_csv(data_csv, header=None)
        if self.is_train:
            images = data.iloc[:,:-1].to_numpy()
            labels = data.iloc[:,-1].astype(int)
        else:
            images = data.iloc[:,:].to_numpy()
            labels = None
        
        self.images = images
        self.labels = labels
        print("Total Images: {}, Data Shape = {}".format(len(self.images), images.shape))
        
    def __len__(self):
        """Returns total number of samples in the dataset"""
        return len(self.images)
    
    def __getitem__(self, idx):
        """
        Loads image of the given index and performs preprocessing.
        
        INPUT: 
        idx: index of the image to be loaded.
        
        OUTPUT:
        sample: dictionary with keys images (Tensor of shape [1,C,H,W]) and labels (Tensor of labels [1]).
        """
        image = self.images[idx]
        image = np.array(image).astype(np.uint8).reshape(32,32, 1)
        
        if self.is_train:
            label = self.labels[idx]
        else:
            label = -1
        
        image = self.img_transform(image)
        # print(image.shape, label, type(image))
        sample = {"images": image, "labels": label}
        return sample

In [4]:
class NN(nn.Module):
    def __init__(self,n_h1,n_h2,p):
        super(NN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3,stride=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.maxpool1 = nn.MaxPool2d(2,stride=2)
        self.conv2 = nn.Conv2d(32, 64, 3,stride=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.maxpool2 = nn.MaxPool2d(2,stride=2)
        self.conv3 = nn.Conv2d(64, 256, 3,stride=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.maxpool3 = nn.MaxPool2d(2,stride=1)
        self.conv4 = nn.Conv2d(256, 512, 3,stride=1)
        self.fc1 = nn.Linear(512, n_h1)
        self.dropout = nn.Dropout(p)
        self.fc2 = nn.Linear(n_h1, n_h2)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool1(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.maxpool2(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.maxpool3(x)
        x = F.relu(self.conv4(x))
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [5]:
train_data = DevanagariDataset(
    data_csv = "devanagari/train_data_shuffled.csv",
    # data_csv = "/content/drive/MyDrive/A2.2/devanagari/train_data_shuffled.csv",
    train = True, 
    img_transform = transforms.ToTensor()
)
test_data = DevanagariDataset(
    data_csv = "devanagari/public_test.csv",
    # data_csv = "/content/drive/MyDrive/A2.2/devanagari/public_test.csv",
    train = True, 
    img_transform = transforms.ToTensor()
)

Total Images: 78200, Data Shape = (78200, 1024)
Total Images: 4600, Data Shape = (4600, 1024)


In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
batch_size = 200
epochs = 8
lr = 1e-4
torch.manual_seed(51)
model = NN(256, 46, 0.2)
model.to(device)
loss = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = lr)


In [None]:
sum(p.numel() for p in model.parameters())

In [8]:
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = False)
test_loader = DataLoader(dataset = test_data, batch_size = len(test_data), shuffle = False)

In [9]:
loss_vals = []
accs = []

In [10]:
for epoch in range(epochs):
    train_loss = 0
    accu_train = 0
    print("Epoch:", epoch+1)
    for batch in train_loader:
        batch_x, batch_y = batch["images"].to(device), batch["labels"].to(device)
        y_hat = model.forward(batch_x)
        loss_val = loss(y_hat, batch_y)
        train_loss+=loss_val.item()
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

        predictions = y_hat.argmax(dim=1, keepdim=True).squeeze()
        correct = (predictions == batch_y).sum().item()
        acc = correct / len(batch_x)
        accu_train+=acc

    for batch in test_loader:
        batch_x, batch_y = batch["images"].to(device), batch["labels"].to(device)
        y_hat = model.forward(batch_x)
        predictions = y_hat.argmax(dim=1, keepdim=True).squeeze()
        correct = (predictions == batch_y).sum().item()
        acc = correct / len(batch_y)
        accs.append(acc)
    loss_vals.append(train_loss/len(train_loader))
    print("Train Loss",loss_vals[-1])
    print("Train Accuracy",accu_train/len(train_loader))
    print("Test Accuracy",accs[-1])

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Epoch 1: 100%|██████████| 391/391 [00:19<00:00, 20.25batch/s, accuracy=91.5, loss=0.378]


Train Loss 1.359471805412751
Train Accuracy 0.6554475703324808
Test Accuracy 0.8719565217391304


Epoch 2: 100%|██████████| 391/391 [00:18<00:00, 21.23batch/s, accuracy=93, loss=0.217]


Train Loss 0.33145143651901304
Train Accuracy 0.907314578005116
Test Accuracy 0.9347826086956522


Epoch 3: 100%|██████████| 391/391 [00:18<00:00, 21.39batch/s, accuracy=97.5, loss=0.119]


Train Loss 0.19539045243312025
Train Accuracy 0.9454731457800498
Test Accuracy 0.9484782608695652


Epoch 4: 100%|██████████| 391/391 [00:18<00:00, 21.38batch/s, accuracy=96, loss=0.104]


Train Loss 0.13362599330027694
Train Accuracy 0.9620971867007677
Test Accuracy 0.96


Epoch 5: 100%|██████████| 391/391 [00:18<00:00, 21.29batch/s, accuracy=97.5, loss=0.078]


Train Loss 0.1004137891866362
Train Accuracy 0.9717263427110007
Test Accuracy 0.9617391304347827


Epoch 6: 100%|██████████| 391/391 [00:18<00:00, 21.37batch/s, accuracy=98, loss=0.0617]


Train Loss 0.07590792702553827
Train Accuracy 0.9784910485933546
Test Accuracy 0.9676086956521739


Epoch 7: 100%|██████████| 391/391 [00:18<00:00, 21.36batch/s, accuracy=97, loss=0.056]


Train Loss 0.06045453268034227
Train Accuracy 0.9831969309462966
Test Accuracy 0.9715217391304348


Epoch 8: 100%|██████████| 391/391 [00:18<00:00, 21.40batch/s, accuracy=99.5, loss=0.0275]


Train Loss 0.04844150289683543
Train Accuracy 0.9869181585677794
Test Accuracy 0.9741304347826087


In [None]:
model_path = "./model.pth"
loss_path = "./loss.txt"
acc_path = "./accuracy.txt"
pred_path = "./predictions.txt"

In [14]:
torch.save(model.state_dict(), model_path)
np.savetxt(acc_path, accs)
np.savetxt(loss_path, loss_vals)

In [17]:
torch.manual_seed(51)
model2 = NN(256, 46, 0.2)
model2.to(device)
model2.load_state_dict(torch.load(model_path, map_location=device))
for batch in test_loader:
    batch_x, batch_y = batch["images"].to(device), batch["labels"].to(device)
    y_hat = model2.forward(batch_x)
    y_hat = torch.argmax(y_hat, dim=1)
    np.savetxt("./predictions.txt", y_hat.detach().cpu().numpy())

<All keys matched successfully>

In [None]:
a = np.loadtxt("predictions.txt")
b = np.loadtxt("predictions2.txt")
c = [0 if a[i]==b[i] else 1 for i in range(len(a))]
sum(c)