In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import datasets, transforms, models
from torchvision.transforms import ToTensor, Lambda
from torchvision.io import read_image

from sklearn.model_selection import StratifiedShuffleSplit
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
train_data = 'train.csv'
test_data = 'test.csv'

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
class HandDataset(Dataset):
    def __init__(self, csv_file, transform=None, target_transform=None):
        self.kind = csv_file.split('.')[0]
        self.img_labels = pd.read_csv(csv_file)
        if self.kind == 'train':
            self.img_labels['label'].loc[self.img_labels['label'] == '10-1'] = '0'
            self.img_labels['label'].loc[self.img_labels['label'] == '10-2'] = '10'
            self.img_labels['label'] = self.img_labels['label'].astype(int)
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.kind,self.img_labels.iloc[idx,0])
        img = Image.open(img_path)
        # img = np.array(img)
        if self.transform:
            img = self.transform(img)
        if self.kind == 'test':
            return img
        label = self.img_labels.iloc[idx, 1].item()
        label = np.array(label)
        return img, label

In [6]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv_1 = self.Conv(3, 8)
        self.conv_2 = self.Conv(8, 16)
        self.conv_3 = self.Conv(16, 32)
        self.conv_4 = self.Conv(32, 64)
        self.conv_5 = self.Conv(64, 128)
        self.conv_6 = self.Conv(128, 256)
        self.gap = self.Gap()
        self.dense_1 = self.Dense(256, 128)
        self.dense_2 = self.Dense(128, 64)
        self.dense_3 = self.Dense(64, 32)
        self.dense_4 = self.Dense(32, 16)
        # self.dense_5 = self.Dense(16, 8)
        self.head = nn.Linear(16, 11)
        
    def forward(self, x):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.conv_3(x)
        x = self.conv_4(x)
        # x = self.conv_5(x)
        # x = self.conv_6(x)
        x = self.gap(x)
        x = x.view(x.size(0),x.size(1))
        # x = self.dense_1(x)
        # x = self.dense_2(x)
        x = self.dense_3(x)
        x = self.dense_4(x)
        # x = self.dense_5(x)
        x = self.head(x)
        return x
        
    def Conv(self, i_ch, o_ch):
        return nn.Sequential(
            nn.Conv2d(i_ch, o_ch, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # nn.Conv2d(o_ch, o_ch, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            # nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2))
    
    def Gap(self):
        return nn.AdaptiveAvgPool2d((1,1))
    
    def Dense(self, i_ch, o_ch):
        return nn.Sequential(
            nn.Linear(in_features=i_ch, out_features=o_ch),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5, inplace=False))

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 8, kernel_size = 3, padding = 1)
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = 3, padding = 1)
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.fc1 = nn.Linear(56 * 56 * 16, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 11)
        
    def forward(self, x):
        x = self.conv1(x) # 224 * 224 * 8
        x = F.relu(x)     # 224 * 224 * 8
        x = self.pool(x)  # 112 * 112 * 8
        x = self.conv2(x) # 112 * 112 * 16
        x = F.relu(x)     # 112 * 112 * 16
        x = self.pool(x)  # 56 * 56 * 16
        
        x = x.view(-1, 56 * 56 * 16)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.log_softmax(x)
        return x

In [8]:
def train_loop(dataloader, model, loss_fn, optimizer):
    total_loss = 0

    model.train()
    for batch, data in enumerate(dataloader):
        X, y = data
        X = X.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()
        
        current_loss = loss.item()
        total_loss += current_loss
        
        print('\r train_loss: %.4f' % (total_loss/(batch+1)), end='')
            
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            X, y = data
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            
            test_loss += loss_fn(output, y).item()
            _, pred = torch.max(output, 1)

            correct += (pred == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f'\r Valid Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}',end='')

In [9]:
train_dataset = HandDataset(train_data, transform=ToTensor())
test_dataset = HandDataset(test_data, transform=ToTensor())

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=0)
indices = list(range(len(train_dataset)))
y_train = [y for _, y in train_dataset]

for train_index, val_index in sss.split(indices, y_train):
    print(len(train_index), len(val_index))
    
train_ds = Subset(train_dataset, train_index)
val_ds = Subset(train_dataset, val_index)

729 129


In [10]:
meanRGB = [np.mean(x.numpy(), axis=(1,2)) for x,_ in train_dataset]
stdRGB = [np.std(x.numpy(), axis=(1,2)) for x,_ in train_dataset]

meanR = np.mean([m[0] for m in meanRGB])
meanG = np.mean([m[1] for m in meanRGB])
meanB = np.mean([m[2] for m in meanRGB])

stdR = np.mean([s[0] for s in stdRGB])
stdG = np.mean([s[1] for s in stdRGB])
stdB = np.mean([s[2] for s in stdRGB])

print(meanR, meanG, meanB)
print(stdR, stdG, stdB)

hand_transform = transforms.Compose([
    transforms.Resize((300, 300)), 
    transforms.RandomCrop(224),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomHorizontalFlip(p = 1),
    transforms.ToTensor(),
    transforms.Normalize([meanR, meanG, meanB], [stdR, stdG, stdB]),
])

train_dataset.transform = hand_transform

0.5878615 0.53980184 0.4853427
0.15058757 0.15921523 0.17031455


In [11]:
learning_rate = 5e-3
batch_size = 32
epochs = 200

model = Model()
# model = CNN()
model.to(device)

loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=16)
valid_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=16)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=16)

In [None]:
for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------')
    train_loop(train_dataloader, model, loss_fn, optimizer)
    print('')
    test_loop(valid_dataloader, model, loss_fn)
    print('')
print('Done!')

Epoch 1
-------------------
 train_loss: 2.4059
 Valid Accuracy: 9.2%, Avg loss: 2.398553
Epoch 2
-------------------
 train_loss: 2.4023
 Valid Accuracy: 9.2%, Avg loss: 2.395711
Epoch 3
-------------------
 train_loss: 2.3962
 Valid Accuracy: 8.1%, Avg loss: 2.394872
Epoch 4
-------------------
 train_loss: 2.3960
 Valid Accuracy: 9.2%, Avg loss: 2.394422
Epoch 5
-------------------
 train_loss: 2.3937
 Valid Accuracy: 9.3%, Avg loss: 2.393680
Epoch 6
-------------------
 train_loss: 2.3979
 Valid Accuracy: 9.6%, Avg loss: 2.393390
Epoch 7
-------------------
 train_loss: 2.3944
 Valid Accuracy: 9.6%, Avg loss: 2.393400
Epoch 8
-------------------
 train_loss: 2.3964
 Valid Accuracy: 9.6%, Avg loss: 2.393573
Epoch 9
-------------------
 train_loss: 2.3951
 Valid Accuracy: 9.6%, Avg loss: 2.393566
Epoch 10
-------------------
 train_loss: 2.3946
 Valid Accuracy: 9.6%, Avg loss: 2.393581
Epoch 11
-------------------
 train_loss: 2.3945
 Valid Accuracy: 9.7%, Avg loss: 2.393218
Epoch 12

In [None]:
correct = 0
total = 0
summit = []
with torch.no_grad():
    for data in test_dataloader:
        images = data.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        v = str(int(predicted.cpu()))
        if v == '0':
            v = '10-1'
        elif v== '10':
            v = '10-2'
        summit.append(v)

In [None]:
submission = pd.read_csv('sample_submission.csv')

In [None]:
submission['label'] = summit

In [None]:
submission['label'].unique()

In [None]:
submission.to_csv('commit_1.csv', index=0)

In [None]:
# x,y = next(iter(train_dataloader))

In [None]:
# l = k.cpu().numpy().astype(int)

In [None]:
# plt.imshow(np.transpose(l, (1, 2, 0)))