In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms

#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping
    

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets

import torchvision.transforms as transforms

import numpy as np
import pandas as pd

train_on_gpu = torch.cuda.is_available()

In [None]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [None]:
full_data = VowelConsonantDataset("../input/padhai-hindi-vow-cons-classification/train/train",train=True,transform=transform)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=32, shuffle=True)

In [None]:
test_data = VowelConsonantDataset("../input/padhai-hindi-vow-cons-classification/test/test",train=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32,shuffle=False)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
from torchvision import models

In [None]:
class Kaggle(nn.Module):
  def __init__(self):
    super(Kaggle, self).__init__()
    self.model_resnet = models.resnet50(pretrained = True, progress = True)
    final_in_features = self.model_resnet.fc.in_features
    self.model_resnet.fc = nn.Linear(final_in_features, 2048)
#    self.cnn_model = nn.Sequential(
#        nn.Conv2d(3, 6, 3, stride = 1, padding = 1),
#        nn.ReLU(),
#        nn.AvgPool2d(2, stride = 2),
#        nn.Conv2d(6, 16, 3, stride = 1, padding = 1),
#        nn.ReLU(),
#        nn.AvgPool2d(2, stride = 2),
#    )
#    self.cnn_model2 = nn.Sequential(
#        nn.Conv2d(3, 6, 3, stride = 1, padding = 1),
#        nn.ReLU(),
#        nn.AvgPool2d(2, stride = 2),
#        nn.Conv2d(6, 16, 3, stride = 1, padding = 1),
#        nn.ReLU(),
#        nn.AvgPool2d(2, stride = 2),
#    )
    self.fc_model = nn.Sequential(
        nn.BatchNorm1d(2048),
        nn.Dropout(0.3),
        nn.Linear(2048, 256),
        nn.ReLU(),
        nn.Linear(256, 10),
    )
    self.fc_model2 = nn.Sequential(
        nn.BatchNorm1d(2048),
        nn.Dropout(0.3),
        nn.Linear(2048, 256),
        nn.ReLU(),
        nn.Linear(256, 10),  
    )

  def forward (self, x):
    x = self.model_resnet(x)
    x = x.view(x.size(0), -1) # compressing into ( batch, the rest terms )
    x1 = self.fc_model(x)
    x2 = self.fc_model2(x)
    return x1, x2


In [None]:
print(Kaggle())

In [None]:
kag = Kaggle()
kag = kag.to(device)

In [None]:
def evaluation(dataloader, model):
    total, correct = 0, 0
    for data in dataloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        output1, output2 = kag.forward(inputs)
        _, pred1 = torch.max(output1.data, 1)
        _, pred2 = torch.max(output2.data, 1)
        _,labels1=torch.max(labels[:,0,:].data,1)
        _,labels2=torch.max(labels[:,1,:].data,1)
        total += labels.size(0)
        corr1 = (pred1==labels1)
        corr2 = (pred2==labels2)
        correct += (corr1 == corr2).sum().item()
    return 100 * correct / total

In [None]:
import torch.optim as optim

loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(kag.parameters(), lr = 0.05)

In [None]:
%%time
loss_arr = []
loss_epoch_arr = []
max_epochs = 1

for epochs in range(max_epochs):
  for i, data in enumerate(train_loader, 0):
    kag.train()
    images, labels = data
    images = images.to(device)
    
    #print(labels.shape)
    labels1 = labels[:, 0, :].cpu()
    labels2 = labels[:, 1, :].cpu()
    #print(labels1.shape)
    labels1 = np.argmax(labels1, axis = 1)
    #print(labels1.shape)
    labels2 = np.argmax(labels2, axis = 1)
    labels1 = labels1.to(device)
    labels2 = labels2.to(device)
    #print(labels1, labels2)
    
    opt.zero_grad()    
    output1, output2 = kag.forward(images)
    #print(output1.shape)
    #print(labels1.shape)

    loss1 = loss_fn(output1, labels1)
    loss2 = loss_fn(output2, labels2)
    
    loss = torch.add(loss1, loss2)
    loss.backward()
    opt.step()
    
    loss_arr.append(loss.item())
  loss_epoch_arr.append(loss.item())
  print("Train Accuracy :",evaluation(train_loader,kag))
plt.plot(loss_arr)
plt.show()

In [None]:
print(evaluation(validation_loader,kag))

In [None]:
#for images, labels in test_loader:
#    print(labels)

In [None]:
kag.eval()
prediction = []
actual_label = []
for inputs, label in test_loader:
    inputs = inputs.to(device)
    outputs = kag.model_resnet(inputs)
    print(outputs.shape)
    out1 = kag.fc_model(outputs)
    out2 = kag.fc_model2(outputs)
    _,pred1=torch.max(out1,1)
    pred1=pred1.tolist()
    _,pred2=torch.max(out2,1)
    pred2=pred2.tolist()
    for x,y,z in zip(pred1,pred2,label):
        pred = "V"+str(x)+"_"+"C"+str(y)
        print(pred)
        prediction.append(pred)
        actual_label.append(z)

In [None]:
submission = pd.DataFrame({"ImageId": actual_label, "Class": prediction})
submission.to_csv('submission.csv', index=False)