<a href="https://colab.research.google.com/github/arohanajit/hindi-alphabets-classification/blob/master/project/HindiVowelClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import shutil
from zipfile import ZipFile
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
from tqdm import tqdm_notebook
import copy
train_on_gpu = torch.cuda.is_available()

In [0]:
if 'dataset' not in os.listdir():
    os.environ['KAGGLE_USERNAME'] = "arohanajit232" # username from the json file
    os.environ['KAGGLE_KEY'] = "5289e13af33762d697c1d3c18c444f52" # key from the json file
    os.system('kaggle competitions download -c padhai-hindi-vowel-consonant-classification')
    with ZipFile('test.zip', 'r') as zipObj:
        zipObj.extractall()
    with ZipFile('train.zip', 'r') as zipObj:
        zipObj.extractall()
    os.mkdir('dataset')
    shutil.move('train','dataset/')
    shutil.move('test','dataset/')
    os.remove('train.zip')
    os.remove('test.zip')
    print(os.listdir("dataset"))
else:
    print("Dataset already present!",os.listdir("dataset"))

In [0]:
#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping

In [0]:
transform1 = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

In [0]:
full_data=VowelConsonantDataset("../content/dataset/train",train=True,transform=transform1)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=64, shuffle=True)
test_data=VowelConsonantDataset("../content/dataset/test",train=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64,shuffle=False)

In [0]:
print(len(train_data))
print(len(validation_data))
print(len(full_data))
full_data.get_classes()

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [0]:
data_iter = iter(train_loader)
images, labels = next(data_iter)
print(images[0].shape,images[0].size(0))
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    img = np.transpose(np.squeeze(images[idx]))
    ax.imshow(img)
print("\n\n\n",torch.max(labels[:,0,:],1))
print(labels.shape)

In [0]:
class MyModel(nn.Module):
    def __init__(self, num_classes1, num_classes2):
        super(MyModel, self).__init__()
        self.model_resnet = torchvision.models.resnet50(pretrained=True)
        self.model_resnet.fc = nn.Linear(2048,1280,bias=True)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(1280, num_classes1,bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.zeros_(self.fc1.bias)
        self.fc2 = nn.Linear(1280, num_classes2,bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        torch.nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.model_resnet(x)
        drop = self.dropout(x)
        out1 = self.fc1(x)
        out2 = self.fc2(x)
        return out1, out2

In [0]:
my_model = MyModel(10,10)
loss_fn = nn.CrossEntropyLoss()
opt = optim.SGD(my_model.parameters(),lr=0.01,momentum=0.9,nesterov=True)
my_model.to(device)

In [0]:
def evaluation(dataloader,model):
    total,correct=0,0
    for data in dataloader:
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        out1,out2=my_model(inputs)
        _,pred1=torch.max(out1.data,1)
        _,pred2=torch.max(out2.data,1)
        _,labels1=torch.max(labels[:,0,:].data,1)
        _,labels2=torch.max(labels[:,1,:].data,1)
        total+=labels.size(0)
        fin1=(pred1==labels1)
        fin2=(pred2==labels2)
        
        correct+=(fin1==fin2).sum().item()
    return 100*correct/total

In [0]:
loss_epoch_arr = []
loss_arr = []
min_loss = 1000
batch_size = 64
n_iters = np.ceil(9000/batch_size)
epochs = 45
for epoch in tqdm_notebook(range(epochs)):
    count = 0
    for i,data in enumerate(tqdm_notebook(train_loader),0):
        my_model.train()
        image,label = data
        image, label = image.to(device), label.to(device)
        opt.zero_grad()
        out = my_model.forward(image)
        _,ind_V = torch.max(label[:,0,:],1)
        _,ind_C = torch.max(label[:,1,:],1)
        loss1 = loss_fn(out[0],ind_V)
        loss2 = loss_fn(out[1],ind_C)
        loss = torch.add(loss1,loss2)
        loss.backward()
        opt.step()
        if min_loss > loss.item():
            min_loss = loss.item()
            best_model = copy.deepcopy(my_model.state_dict())
            print('Min loss %0.2f' % min_loss)
        if i % 100 == 0:
            print('Iteration: %d/%d, Loss: %0.2f' % (i, n_iters, loss.item()))
        del image, label, out
        torch.cuda.empty_cache()
        loss_arr.append(loss.item())
    print("Epoch number :",epoch)
    print("Train Accuracy :",evaluation(train_loader,my_model))
    print("Test Accuracy :"  ,evaluation(validation_loader,my_model))
    loss_epoch_arr.append(loss.item())
    if epoch%10==0:
        plt.plot(loss_arr)
        plt.show()