In [112]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms, datasets
import os
import numpy as np
from statistics import mode

In [101]:
# Send the model to the GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [215]:
#CNN Network
class ConvNet(nn.Module):
    def __init__(self,num_classes=25):
        super(ConvNet,self).__init__()
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        #Input shape= (256,3,150,150)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,150,150)
        self.bn2=nn.BatchNorm2d(num_features=20)
        #Shape= (256,20,150,150)
        self.relu2=nn.ReLU()
        #Shape= (256,20,150,150)
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        self.conv4=nn.Conv2d(in_channels=32,out_channels=46,kernel_size=3,stride=1,padding=1)
        #Shape= (256,46,75,75)
        self.bn4=nn.BatchNorm2d(num_features=46)
        #Shape= (256,46,75,75)
        self.relu4=nn.ReLU()
        #Shape= (256,46,75,75)
        
        self.conv5=nn.Conv2d(in_channels=46,out_channels=60,kernel_size=3,stride=1,padding=1)
        #Shape= (256,60,75,75)
        self.bn5=nn.BatchNorm2d(num_features=60)
        #Shape= (256,60,75,75)
        self.relu5=nn.ReLU()
        #Shape= (256,60,75,75)
        
        self.fc=nn.Linear(in_features=7 * 7 * 60,out_features=num_classes)
        
        #Feed forward function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
        output=self.pool(output)
        
        output=self.conv2(output)
        output=self.bn2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.relu3(output)
            
        output=self.conv4(output)
        output=self.bn4(output)
        output=self.relu4(output)
        
        output=self.conv5(output)
        output=self.bn5(output)
        output=self.relu5(output)
            
        #Above output will be in matrix form, with shape (256,60,75,75)
            
        output=output.view(-1,60*7*7)
                
        output=self.fc(output)
            
        return output

In [207]:
# Define transforms to be applied to the image data
transform = transforms.Compose([
    transforms.Resize(75),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


In [239]:
# Load the dataset
train_dataset = datasets.ImageFolder("D:/tay/Data/Data_crop/Data_split/classes_image/image_train/", transform=transform)

# Define the data loader
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

# Define the label names
label_names = train_dataset.classes

In [80]:
print(label_names)

['hand_A', 'hand_A2', 'hand_B', 'hand_C', 'hand_D', 'hand_D2', 'hand_E', 'hand_G', 'hand_H', 'hand_I', 'hand_K', 'hand_L', 'hand_M', 'hand_N', 'hand_O', 'hand_O3', 'hand_P', 'hand_Q', 'hand_R', 'hand_S', 'hand_T', 'hand_U', 'hand_V', 'hand_X', 'hand_Y']


In [254]:
# Initialize the model and optimizer
model = ConvNet(num_classes=25)
optimizer=optim.Adam(model.parameters(),lr=0.0001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [None]:
best_accuracy=0.0
num_epochs = 10
for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    test_accuracy=0.0
    for i, (images,labels) in enumerate(train_loader):
        optimizer.zero_grad()
        
        outputs=model(images)
        _,preds=torch.max(outputs.data, dim = 1)
        #print(preds.shape)
        labels = np.ones(256)*labels.detach().numpy()
        labels = labels.astype(np.int8)
        labels = torch.Tensor(labels)
        #print(labels)
        train_accuracy+=int(torch.sum(preds==labels.data))
        #print(test_accuracy)
        #print(train_accuracy/256)
        #loss=loss_function(outputs,labels)
        #loss.backward()
        optimizer.step()
        
    train_accuracy=train_accuracy/256
    print('Epoch: '+str(epoch)+' Train Accuracy: '+str(train_accuracy))
    torch.save(model.state_dict(),'E:/saved_model/epoch {}_checkpoint.model'.format(epoch+1))

In [209]:
# Load the dataset
val_dataset = datasets.ImageFolder("D:/tay/Data/Data_crop/Data_split/classes_image/image_test/", transform=transform)

# Define the data loader
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)

# Define the label names
label_names = val_dataset.classes

In [None]:
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(val_loader):
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count

In [None]:
# Evaluate the model on the validation set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in val_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total:.2f}%')