In [1]:
import numpy as np
import os
import time
import datetime as dt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms, datasets
from torch.autograd import Variable
import torchvision

In [2]:
# Send the model to the GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# Define transforms to be applied to the image data
transform = transforms.Compose([
    transforms.Resize(128),
    transforms.CenterCrop(100),
    #transforms.RandomHorizontalFlip(),
    #transforms.RandomVerticalFlip(),
    #transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [83]:
# Load the dataset
train_dataset = datasets.ImageFolder("D:/tay/Data/Data_crop/Data_split/classes_image/image_train/", transform=transform)

# Define the data loader
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

# Define the label names
label_names = train_dataset.classes

In [84]:
# Load the dataset
val_dataset = datasets.ImageFolder("D:/tay/Data/Data_crop/Data_split/classes_image/image_test/", transform=transform)

# Define the data loader
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)

# Define the label names
label_names = val_dataset.classes

In [22]:
print(label_names)

['hand_A', 'hand_A2', 'hand_B', 'hand_C', 'hand_D', 'hand_D2', 'hand_E', 'hand_G', 'hand_H', 'hand_I', 'hand_K', 'hand_L', 'hand_M', 'hand_N', 'hand_O', 'hand_O3', 'hand_P', 'hand_Q', 'hand_R', 'hand_S', 'hand_T', 'hand_U', 'hand_V', 'hand_X', 'hand_Y']


In [85]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(2304*4, 128)
        self.fc2 = nn.Linear(128, 25)
    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.pool3(torch.relu(self.conv3(x)))
        x = x.view(-1, 2304*4)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = ConvNet().to(device)
print(model)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
num_epochs = 1
start_time = time.time()
# Loop over the dataset and train the model
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()
        
        #send data to cuda
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print training statistics
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Train time: {:.2f}'
                  .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item(), (time.time()-start_time)/60))

    # Evaluate the model on the validation dataset
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy on validation set: {:.2f}%'.format(100 * correct / total))


ConvNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=25, bias=True)
)
Epoch [1/1], Step [100/274360], Loss: 3.1079, Train time: 0.08
Epoch [1/1], Step [200/274360], Loss: 3.2567, Train time: 0.12
Epoch [1/1], Step [300/274360], Loss: 3.2329, Train time: 0.16
Epoch [1/1], Step [400/274360], Loss: 3.2315, Train time: 0.20
Epoch [1/1], Step [500/274360], Loss: 3.1722, Train time: 0.25
Epoch [1/1], Step [600/274360], Loss: 3.1081, Train time: 0.33