In [0]:
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
from tqdm import tqdm_notebook
train_on_gpu = torch.cuda.is_available()

In [0]:
%%capture
os.environ['KAGGLE_USERNAME'
] = "arohanajit232" # username from the json file
os.environ['KAGGLE_KEY'
] = "5289e13af33762d697c1d3c18c444f52" # key from the json file
!kaggle competitions download -c padhai-hindi-vowel-consonant-classification # api copied from kaggle

In [0]:
%%capture
!unzip test.zip
!unzip train.zip
!mkdir dataset
!!mv train dataset/
!mv test dataset/
!rm train.zip
!rm test.zip

In [0]:
!ls dataset/

In [0]:
#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping

In [0]:
transform1 = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

In [0]:
full_data=VowelConsonantDataset("../content/dataset/train",train=True,transform=transform1)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=64, shuffle=True)
test_data=VowelConsonantDataset("../content/dataset/test",train=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64,shuffle=False)

In [0]:
print(len(train_data))
print(len(validation_data))
print(len(full_data))
full_data.get_classes()

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [0]:
data_iter = iter(train_loader)
images, labels = next(data_iter)
print(images[0].shape,images[0].size(0))
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    img = np.transpose(np.squeeze(images[idx]))
    ax.imshow(img)
print("\n\n\n",torch.max(labels[:,0,:],1))

In [0]:
model = torchvision.models.resnet50(pretrained=True)

In [0]:
for param in model.parameters():
    param.requires_grad = False

In [0]:
model.fc = nn.Sequential(
    nn.Linear(2048,1024,bias=True),
    nn.ReLU(True),
    nn.Dropout(p=0.2),
    nn.Linear(1024,512,bias=True),
    nn.ReLU(True),
    nn.Dropout(p=0.2),
    nn.Linear(512,64,bias=True),
    nn.ReLU(True),
    nn.Linear(64,10,bias=True)
    
)

In [0]:
model.to(device)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters())

In [0]:
try:
    epochs = 45
    total_trainloss = []
    total_valloss = []
    total_trainacc = []
    total_valacc = []
    for i in tqdm_notebook(range(epochs)):
        train_loss = 0
        valid_loss = 0
        totalval_train = 0
        totalval_val = 0
        acc_train = 0
        acc_valid = 0
        model.train()
        for image,label in tqdm_notebook(train_loader):
            image, label = image.to(device), label.to(device)
            totalval_train+=64
            opt.zero_grad()
            out = model(image)
            val,ind = torch.max(label[:,0,:],1)
            _,pred_train = torch.max(out,1)
            acc_train += (pred_train==ind).sum().item()
            loss = loss_fn(out,ind)
            train_loss += loss.item()*image.size(0)
            loss.backward()
            opt.step()
            del image, label
        
        model.eval()
        for image,label in tqdm_notebook(validation_loader):
            totalval_val += 64
            image, label = image.to(device), label.to(device)
            V_out = model(image)
            V_val,V_ind = torch.max(label[:,0,:],1)
            vloss = loss_fn(V_out,V_ind)
            valid_loss += vloss.item()*image.size(0)
            _,pred_valid = torch.max(V_out,1)
            acc_valid += (pred_valid==V_ind).sum().item()
        
        train_loss /= len(train_loader)
        valid_loss /= len(validation_loader)
        acc_train = acc_train/totalval_train * 100
        acc_valid = acc_valid/totalval_val * 100
        total_trainloss.append(train_loss)
        total_valloss.append(valid_loss)
        total_trainacc.append(acc_train)
        total_valacc.append(acc_valid)

        print('Epoch: {} Train loss: {:.2f} Validation loss: {:.2f}'.format(i,train_loss,valid_loss))
        print('Epoch: {} Train accuracy: {:.2f} Validation accuracy: {:.2f}'.format(i,acc_train,acc_valid))
        
    torch.save(model.state_dict(), "best_model.pth")
    del out
    torch.cuda.empty_cache()
finally:
    fig,(ax1,ax2) = plt.subplots(1,2,figsize=(20,5))
    ax1.plot(total_trainloss)
    ax1.plot(total_valloss)
    ax1.legend(['Train Loss','Validation Loss'])
    ax1.set_title('Loss')
    ax2.plot(total_trainacc)
    ax2.plot(total_valacc)
    ax2.legend(['Train accuracy','Validation Accuracy'])
    ax2.set_title('Accuracy')

In [0]:
model.load_state_dict(torch.load("best_model.pth"))
model.to(device)
model.eval()

In [0]:
total=0
v=0
c=0
for data in tqdm_notebook(validation_loader,total=len(validation_loader),unit='batch'):
    images,labels = data
    images,labels = images.to(device),labels.to(device)
    _,out_v = torch.max(vowel_model(images),1)
    _,out_c = torch.max(cons_model(images),1)
    _,lab1 = torch.max(labels[:,0,:],1)
    _,lab2 = torch.max(labels[:,1,:],1)
    total += 64
    v += (out_v==lab1).sum().item()
    c += (out_c==lab2).sum().item()
print('total images:',total)
print('correct vowels predictions:',v)
print('correct consonants predictions:',c)
print('Vowel Accuracy: ',(v/total)*100, '%')
print('Consonants Accuracy: ',(c/total)*100,'%')
    