In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from skimage import io, transform
from sklearn.preprocessing import LabelEncoder
import numbers

import time

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [12]:
num_epochs = 5
num_classes = 17
batch_size = 100
learning_rate = 0.01

In [13]:
class Center_crop(object):
    def __init__(self, output_size):
        self.output_size = int(output_size)

    def __call__(self, img):
        w = len(img[1])
        tw = self.output_size
        #i = int(round((h - th) / 2.))
        j = int(round((w - tw) / 2.))
        image = img[:, j:j+tw,:]
        return image

In [14]:
#trans = transforms.Compose([transforms.ToTensor()])
trans = transforms.Compose([
    #transforms.ToPILImage(mode='RGB'),
    #transforms.RandomCrop(size=23),
    #transforms.RandomAffine(degrees=0,translate=(1,1)),
    transforms.ToTensor()
])

In [15]:
class MyDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.label_data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.label_data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.label_data.iloc[idx, 0])
        image = io.imread(img_name)
        #label = self.label_data.iloc[idx, 1]
        #label = label.astype('float')
        #labels = (self.label_data).as_matrix()
        #labels = (self.label_data).as_matrix()
        labels = (self.label_data).values
        label_data = LabelEncoder().fit_transform(labels[:,1])
        #label_data = labels[:,1]
        label = label_data[idx]
        target = label
        #sample = {'image': image, 'target': target}

        if self.transform:
            image = self.transform(image)

        return image, target

In [16]:
train_dataset = MyDataset(csv_file='listing_train.csv', root_dir='images\\', transform=trans)
test_dataset = MyDataset(csv_file='listing_test.csv', root_dir='images\\', transform=trans)

In [17]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [18]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(1,3), stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,3), stride=2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,3), stride=2))
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(1,2), stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(9 * 2 * 256, 1000)
        self.fc2 = nn.Linear(1000, 17)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        #out = self.drop_out(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [19]:
model = ConvNet()
model.to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [20]:
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []

for epoch in range(num_epochs):
    #if epoch > 0:
        #print(f'Epoch time {time.time() - check}')
    #check = time.time()
    for i, (images, labels) in enumerate(train_loader):
        # Run the forward pass
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))

Epoch [1/5], Step [100/883], Loss: 2.7478, Accuracy: 14.00%
Epoch [1/5], Step [200/883], Loss: 2.5722, Accuracy: 21.00%
Epoch [1/5], Step [300/883], Loss: 2.6722, Accuracy: 18.00%
Epoch [1/5], Step [400/883], Loss: 2.6292, Accuracy: 17.00%
Epoch [1/5], Step [500/883], Loss: 2.6307, Accuracy: 16.00%
Epoch [1/5], Step [600/883], Loss: 2.5320, Accuracy: 18.00%
Epoch [1/5], Step [700/883], Loss: 2.5891, Accuracy: 16.00%
Epoch [1/5], Step [800/883], Loss: 2.5922, Accuracy: 21.00%
Epoch [2/5], Step [100/883], Loss: 2.6493, Accuracy: 19.00%
Epoch [2/5], Step [200/883], Loss: 2.6416, Accuracy: 17.00%
Epoch [2/5], Step [300/883], Loss: 2.5292, Accuracy: 23.00%
Epoch [2/5], Step [400/883], Loss: 2.5855, Accuracy: 14.00%
Epoch [2/5], Step [500/883], Loss: 2.5909, Accuracy: 14.00%
Epoch [2/5], Step [600/883], Loss: 2.6368, Accuracy: 19.00%
Epoch [2/5], Step [700/883], Loss: 2.5860, Accuracy: 19.00%
Epoch [2/5], Step [800/883], Loss: 2.6096, Accuracy: 6.00%


KeyboardInterrupt: 

In [142]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on test images: {} %'.format((correct / total) * 100))

# Save the model and plot
#torch.save(model.state_dict(), MODEL_STORE_PATH + 'conv_net_model.ckpt')

Test Accuracy of the model on test images: 75.20342612419701 %
