<a href="https://colab.research.google.com/github/arohanajit/hindi-alphabets-classification/blob/master/project/HindiVowelClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
from tqdm import tqdm_notebook
train_on_gpu = torch.cuda.is_available()

In [0]:
%%capture
os.environ['KAGGLE_USERNAME'
] = "xxxxxxxx" # username from the json file
os.environ['KAGGLE_KEY'
] = "xxxxxxxxxxxxx" # key from the json file
!kaggle competitions download -c padhai-hindi-vowel-consonant-classification # api copied from kaggle

In [0]:
%%capture
!unzip test.zip
!unzip train.zip
!mkdir dataset
!!mv train dataset/
!mv test dataset/
!rm train.zip
!rm test.zip

In [0]:
!ls dataset/

In [0]:
#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping

In [0]:
transform1 = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

In [0]:
full_data=VowelConsonantDataset("../content/dataset/train",train=True,transform=transform1)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=64, shuffle=True)
test_data=VowelConsonantDataset("../content/dataset/test",train=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64,shuffle=False)

In [0]:
print(len(train_data))
print(len(validation_data))
print(len(full_data))
full_data.get_classes()

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [0]:
data_iter = iter(train_loader)
images, labels = next(data_iter)
print(images[0].shape,images[0].size(0))
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    img = np.transpose(np.squeeze(images[idx]))
    ax.imshow(img)
print("\n\n\n",torch.max(labels[:,0,:],1))

In [0]:
vowel_model = torchvision.models.resnet50(pretrained=True)
cons_model = torchvision.models.resnet50(pretrained=True)
vowel_model

In [0]:
for param in vowel_model.parameters():
    param.requires_grad = False
for param in cons_model.parameters():
    param.requires_grad = False

In [0]:
vowel_model.fc = nn.Linear(2048,10,bias=True)
cons_model.fc = nn.Linear(2048,10,bias=True)
print(vowel_model.fc)
print(cons_model.fc)

In [0]:
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(20,5))
ax1.plot(x)
ax1.plot(y)
ax1.legend(['abc','def'])
ax2.plot(x)
ax2.plot(y)
ax2.legend(['abc','def'])
ax3.plot(x)
ax3.plot(y)
ax3.legend(['abc','def'])

In [0]:
vowel_model.to(device)
cons_model.to(device)
loss_fn_v = nn.CrossEntropyLoss()
loss_fn_c =  nn.CrossEntropyLoss()
opt_V = optim.Adam(vowel_model.parameters())
opt_C = optim.Adam(cons_model.parameters())

In [0]:
try:
    epochs = 45
    total_trainloss = []
    total_valloss = []
    total_voweltrainacc = []
    total_vowelvalacc = []
    total_constrainacc = []
    total_consvalacc = []
    for i in tqdm_notebook(range(epochs)):
        train_loss = 0
        valid_loss = 0
        totalval_train = 0
        totalval_val = 0
        acc_Vtrain = 0
        acc_Ctrain = 0
        acc_Vvalid = 0
        acc_Cvalid = 0
        vowel_model.train()
        cons_model.train()
        for image,label in tqdm_notebook(train_loader):
            image, label = image.to(device), label.to(device)
            totalval_train+=64
            opt_V.zero_grad()
            opt_C.zero_grad()
            out_V = vowel_model(image)
            out_C = cons_model(image)
            val_V,ind_V = torch.max(label[:,0,:],1)
            val_C,ind_C = torch.max(label[:,1,:],1)
            _,pred_Vtrain = torch.max(out_V,1)
            _,pred_Ctrain = torch.max(out_C,1)
            acc_Vtrain += (pred_Vtrain==ind_V).sum().item()
            acc_Ctrain += (pred_Ctrain==ind_C).sum().item()
            loss = loss_fn_v(out_V,ind_V)+loss_fn_c(out_C,ind_C)
            train_loss += loss.item()*image.size(0)
            loss.backward()
            opt_V.step()
            opt_C.step()
            del image, label
        
        vowel_model.eval()
        cons_model.eval()
        for image,label in tqdm_notebook(validation_loader):
            totalval_val += 64
            image, label = image.to(device), label.to(device)
            V_out_V = vowel_model(image)
            V_out_C = cons_model(image)
            V_val_V,V_ind_V = torch.max(label[:,0,:],1)
            V_val_C,V_ind_C = torch.max(label[:,1,:],1)
            vloss = loss_fn_v(V_out_V,V_ind_V)+loss_fn_c(V_out_C,V_ind_C)
            valid_loss += vloss.item()*image.size(0)
            _,pred_Vvalid = torch.max(V_out_V,1)
            _,pred_Cvalid = torch.max(V_out_C,1)
            acc_Vvalid += (pred_Vvalid==V_ind_V).sum().item()
            acc_Cvalid += (pred_Cvalid==V_ind_C).sum().item()
        
        train_loss /= len(train_loader)
        valid_loss /= len(validation_loader)
        V_acc_train = acc_Vtrain/totalval_train * 100
        V_acc_valid = acc_Vvalid/totalval_val * 100
        C_acc_train = acc_Ctrain/totalval_train * 100
        C_acc_valid = acc_Cvalid/totalval_val * 100
        total_trainloss.append(train_loss)
        total_valloss.append(valid_loss)
        total_voweltrainacc.append(V_acc_train)
        total_vowelvalacc.append(V_acc_valid)
        total_constrainacc.append(C_acc_train)
        total_consvalacc.append(C_acc_valid)

        print('Epoch: {} Train loss: {:.2f} Validation loss: {:.2f}'.format(i,train_loss,valid_loss))
        print('Epoch: {} For vowels: Train accuracy: {:.2f} Validation accuracy: {:.2f}'.format(i,V_acc_train,V_acc_valid))
        print('Epoch: {} For consonants: Train accuracy: {:.2f} Validation accuracy: {:.2f}'.format(i,C_acc_train,C_acc_valid))

    torch.save(vowel_model.state_dict(), "best_model_V.pth")
    torch.save(cons_model.state_dict(), "best_model_C.pth")
    del out_V, out_C
    torch.cuda.empty_cache()
finally:
    fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(20,5))
    ax1.plot(total_trainloss)
    ax1.plot(total_valloss)
    ax1.legend(['Train Loss','Validation Loss'])
    ax1.set_title('Loss')
    ax2.plot(total_voweltrainacc)
    ax2.plot(total_constrainacc)
    ax2.legend(['Vowels','Consonants'])
    ax2.set_title('Training Accuracy')
    ax3.plot(total_vowelvalacc)
    ax3.plot(total_consvalacc)
    ax3.legend(['Vowels','Consonants'])
    ax3.set_title('Validation Accuracy')

In [0]:
vowel_model.load_state_dict(torch.load("best_model_V.pth"))
vowel_model.to(device)
vowel_model.eval()

cons_model.load_state_dict(torch.load("best_model_C.pth"))
cons_model.to(device)
cons_model.eval()

In [0]:
total=0
v=0
c=0
for data in tqdm_notebook(validation_loader,total=len(validation_loader),unit='batch'):
    images,labels = data
    images,labels = images.to(device),labels.to(device)
    _,out_v = torch.max(vowel_model(images),1)
    _,out_c = torch.max(cons_model(images),1)
    _,lab1 = torch.max(labels[:,0,:],1)
    _,lab2 = torch.max(labels[:,1,:],1)
    total += 64
    v += (out_v==lab1).sum().item()
    c += (out_c==lab2).sum().item()
print('total images:',total)
print('correct vowels predictions:',v)
print('correct consonants predictions:',c)
print('Vowel Accuracy: ',(v/total)*100, '%')
print('Consonants Accuracy: ',(c/total)*100,'%')
    