In [14]:
# for google colab
from google.colab import drive
# mount your Google Drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [15]:
# for google colab
# copy all files from "HW5" directory in Google drive to current directory
!cp -r ./gdrive/MyDrive/HW5/* .

In [16]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import csv
import cv2
import random
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor, to_pil_image
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [17]:
colabPath = '/content/gdrive/MyDrive/HW5'
# unzip capcha-hacker.zip
zipPath = os.path.join(colabPath, 'captcha-hacker.zip')
!unzip $zipPath 

Archive:  /content/gdrive/MyDrive/HW5/captcha-hacker.zip
replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [18]:
characters1 = '-0123456789'
characters2 = '-0123456789abcdefghijklmnopqrstuvwxyz' 
n_classes_1, n_classes_2 = 11, 37

# Load Data

In [19]:
TRAIN_PATH = "train"
TEST_PATH = "test"

In [20]:
class TaskDataset(Dataset):
  def __init__(self, data, root, characters, input_length, label_length, return_filename=False):
    self.return_filename = return_filename
    self.root = root
    self.data = data
    self.input_length = input_length
    self.label_length = label_length
    self.characters = characters

  def __getitem__(self, index):
    filename, label = self.data[index]
    img = cv2.imread(f"{self.root}/{filename}")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = to_tensor(img)
    label = torch.tensor([self.characters.find(x) for x in label], dtype=torch.long)
    input_length = torch.full(size=(1, ), fill_value=self.input_length, dtype=torch.long)
    label_length = torch.full(size=(1, ), fill_value=self.label_length, dtype=torch.long)
    if self.return_filename:
      return img, filename
    else:
      return img, label, input_length, label_length 

  def __len__(self):
    return len(self.data)

In [21]:
train_data1 = []
train_data2 = []
train_data3 = []
# read all file name of each image
with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
  for row in csv.reader(csvfile, delimiter=','):
    if row[0].startswith("task1"):
      train_data1.append(row)
    elif row[0].startswith("task2"):
      train_data2.append(row)
    elif row[0].startswith("task3"):
      train_data3.append(row)

train_ds1 = TaskDataset(train_data1, root=TRAIN_PATH, characters=characters1, input_length=4, label_length=1)
train_ds2 = TaskDataset(train_data2, root=TRAIN_PATH, characters=characters2, input_length=4, label_length=2)
train_ds3 = TaskDataset(train_data3, root=TRAIN_PATH, characters=characters2, input_length=6, label_length=4)

# split train and val -> [8:2]
train_ds1, val_ds1 = torch.utils.data.random_split(train_ds1, [1600, 400]) 
train_ds2, val_ds2 = torch.utils.data.random_split(train_ds2, [2000, 500])
train_ds3, val_ds3 = torch.utils.data.random_split(train_ds3, [2400, 600])

train_dl1 = DataLoader(train_ds1, batch_size=500, num_workers=2, drop_last=True, shuffle=True)
val_dl1 = DataLoader(val_ds1, batch_size=400, num_workers=2, drop_last=True, shuffle=True)
train_dl2 = DataLoader(train_ds2, batch_size=500, num_workers=2, drop_last=True, shuffle=True)
val_dl2 = DataLoader(val_ds2, batch_size=500, num_workers=2, drop_last=True, shuffle=True)
train_dl3 = DataLoader(train_ds3, batch_size=500, num_workers=2, drop_last=True, shuffle=True)
val_dl3 = DataLoader(val_ds3, batch_size=600, num_workers=2, drop_last=True, shuffle=True)

# Buld Model

In [22]:
class Model(nn.Module):
  def __init__(self, n_classes, input_shape=(1, 72, 72)):
    super(Model, self).__init__()
    self.input_shape = input_shape
    # VGG16
    self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(256, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2, 1)),
            nn.Dropout(0.25, inplace=True)
            )
    
    self.lstm = nn.LSTM(input_size=self.infer_features(), hidden_size=128, num_layers=2, bidirectional=True)
    self.fc = nn.Linear(in_features=256, out_features=n_classes)

  def infer_features(self):
    x = torch.zeros((1,)+self.input_shape)
    x = self.cnn(x)
    x = x.reshape(x.shape[0], -1, x.shape[-1])
    return x.shape[1]

  def forward(self, x):
    x = self.cnn(x)
    x = x.reshape(x.shape[0], -1, x.shape[-1])
    x = x.permute(2, 0, 1)
    x, _ = self.lstm(x)
    x = self.fc(x)
    return x

# Auxiliary Function

In [23]:
def decode(sequence, characters):
  # map index to real characters and do the ctc
  tmp = ''
  for index in sequence:
    tmp += characters[int(index)]

  output = ''
  next_idx = 1
  flag = False

  for c in tmp[:len(tmp) - 1]:
    if c != '-' and c != tmp[next_idx]:
      output += c
      flag = True
    next_idx += 1
  
  if flag is False:
    return output

  if tmp[-1] != '-' and output[-1] != tmp[-1]:
      output += tmp[-1]

  return output

def decode_label(sequence, characters):
  # map index to real character
  output = ''
  for index in sequence:
    output += characters[index]

  return output.replace(' ', '')

'\ndef calc_acc(label, output, characters):\n  # decode the label and prediction, calculating the accuracy\n  output_argmax = output.detach().permute(1, 0, 2).argmax(dim=-1)\n  label, output_argmax = label.cpu().numpy(), output_argmax.cpu().numpy()\n  acc, total = 0, 0\n  for true, pred in zip(label, output_argmax):\n    if decode_label(true, characters) == decode(pred, characters):\n      acc += 1\n    total += 1  \n  acc /= total\n\n  return acc\n  '

# Training 

In [24]:
def train(model, optimizer, epoch, dataloader):
  model.train()
  for image, label, input_lengths, label_lengths in dataloader:
    image = image.to(device)
    label = label.to(device)

    pred = model(image)
    pred_log_probs = F.log_softmax(pred, dim=-1)
    loss = F.ctc_loss(pred_log_probs, label, input_lengths, label_lengths)

    optimizer.zero_grad() # zero the parameter gradient
    loss.backward() # calulate loss and go reverse the direction
    optimizer.step() # update weight

def valid(model, optimizer, epoch, dataloader, characters):
  model.eval()
  with tqdm(dataloader) as pbar, torch.no_grad():
    loss_total = 0
    acc_total = 0
    for batch_index, (image, label, input_lengths, label_lengths) in enumerate(pbar):
      image = image.to(device)
      label = label.to(device)
      
      pred = model(image)
      pred_log_probs = F.log_softmax(pred, dim=-1)
      loss = F.ctc_loss(pred_log_probs, label, input_lengths, label_lengths)
      
      loss_total += loss.item()
      # decode the label and prediction, calculating the accuracy
      pred_argmax = pred.detach().permute(1, 0, 2).argmax(dim=-1)
      label, pred_argmax = label.cpu().numpy(), pred_argmax.cpu().numpy()
      acc, total = 0, 0
      for true, pred in zip(label, pred_argmax):
        if decode_label(true, characters) == decode(pred, characters):
          acc += 1
        total += 1  
      acc /= total
      acc_total += acc
      
      loss_mean = loss_total / (batch_index + 1)
      acc_mean = acc_total / (batch_index + 1)
      
      pbar.set_description(f'Test : {epoch} Loss: {loss_mean:.4f} Acc: {acc_mean:.4f} ')

In [25]:
# initialize model1
model1 = Model(n_classes_1, input_shape=(1, 72, 72)).to(device)
model1 = torch.load('model1.pth')

In [26]:
optimizer = torch.optim.Adam(model1.parameters(), 1e-3, amsgrad=True)
epochs = 5
for epoch in range(1, epochs + 1):
  train(model1, optimizer, epoch, train_dl1)
  valid(model1, optimizer, epoch, val_dl1, characters1)

Test : 1 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.16it/s]
Test : 2 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
Test : 3 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.25it/s]
Test : 4 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.22it/s]
Test : 5 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]


In [27]:
optimizer = torch.optim.Adam(model1.parameters(), 1e-4, amsgrad=True)
epochs = 2
for epoch in range(1, epochs + 1):
  train(model1, optimizer, epoch, train_dl1)
  valid(model1, optimizer, epoch, val_dl1, characters1)

Test : 1 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.25it/s]
Test : 2 Loss: 0.0000 Acc: 1.0000 : 100%|██████████| 1/1 [00:00<00:00,  2.22it/s]


In [28]:
torch.save(model1, 'model1.pth')
!cp model1.pth ./gdrive/MyDrive/HW5/model1.pth 

In [29]:
# initialize model2
model2 = Model(n_classes_2, input_shape=(1, 72, 72)).to(device)
model2 = torch.load('model2.pth')

In [30]:
optimizer = torch.optim.Adam(model2.parameters(), 1e-3, amsgrad=True)
epochs = 30
for epoch in range(1, epochs + 1):
  train(model2, optimizer, epoch, train_dl2)
  valid(model2, optimizer, epoch, val_dl2, characters2)

Test : 1 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
Test : 2 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
Test : 3 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.54it/s]
Test : 4 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
Test : 5 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.48it/s]
Test : 6 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
Test : 7 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.55it/s]
Test : 8 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
Test : 9 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.45it/s]
Test : 10 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.52it/s]
Test : 11 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
Test : 12 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
Test : 13 Los

In [31]:
optimizer = torch.optim.Adam(model2.parameters(), 1e-4, amsgrad=True)
epochs = 20
for epoch in range(1, epochs + 1):
  train(model2, optimizer, epoch, train_dl2)
  valid(model2, optimizer, epoch, val_dl2, characters2)

Test : 1 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.36it/s]
Test : 2 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.36it/s]
Test : 3 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.48it/s]
Test : 4 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
Test : 5 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
Test : 6 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
Test : 7 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.48it/s]
Test : 8 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
Test : 9 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
Test : 10 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.36it/s]
Test : 11 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
Test : 12 Loss: 0.0000 Acc: 0.9820 : 100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
Test : 13 Los

In [32]:
torch.save(model2, 'model2.pth')
!cp model2.pth ./gdrive/MyDrive/HW5/model2.pth 

In [33]:
# initialize model3
model3 = Model(n_classes_2, input_shape=(1, 72, 96)).to(device)
model3 = torch.load('model3.pth')

In [34]:
optimizer = torch.optim.Adam(model3.parameters(), 1e-3, amsgrad=True)
epochs = 100
for epoch in range(1, epochs + 1):
  train(model3, optimizer, epoch, train_dl3)
  valid(model3, optimizer, epoch, val_dl3, characters2)

Test : 1 Loss: 0.0934 Acc: 0.8683 : 100%|██████████| 1/1 [00:00<00:00,  1.93it/s]
Test : 2 Loss: 0.3035 Acc: 0.7567 : 100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
Test : 3 Loss: 0.0452 Acc: 0.9033 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 4 Loss: 0.1047 Acc: 0.8583 : 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Test : 5 Loss: 0.0656 Acc: 0.8767 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 6 Loss: 0.0496 Acc: 0.8950 : 100%|██████████| 1/1 [00:00<00:00,  1.89it/s]
Test : 7 Loss: 0.0745 Acc: 0.8650 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 8 Loss: 0.0174 Acc: 0.9283 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 9 Loss: 0.0458 Acc: 0.9017 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 10 Loss: 0.0133 Acc: 0.9283 : 100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
Test : 11 Loss: 0.0174 Acc: 0.9250 : 100%|██████████| 1/1 [00:00<00:00,  1.89it/s]
Test : 12 Loss: 0.0098 Acc: 0.9333 : 100%|██████████| 1/1 [00:00<00:00,  1.89it/s]
Test : 13 Los

In [35]:
optimizer = torch.optim.Adam(model3.parameters(), 1e-4, amsgrad=True)
epochs = 100
for epoch in range(1, epochs + 1):
  train(model3, optimizer, epoch, train_dl3)
  valid(model3, optimizer, epoch, val_dl3, characters2)

Test : 1 Loss: 0.0305 Acc: 0.9250 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 2 Loss: 0.0302 Acc: 0.9267 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 3 Loss: 0.0317 Acc: 0.9250 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 4 Loss: 0.0324 Acc: 0.9233 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 5 Loss: 0.0331 Acc: 0.9250 : 100%|██████████| 1/1 [00:00<00:00,  1.94it/s]
Test : 6 Loss: 0.0337 Acc: 0.9250 : 100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
Test : 7 Loss: 0.0322 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 8 Loss: 0.0300 Acc: 0.9317 : 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Test : 9 Loss: 0.0287 Acc: 0.9317 : 100%|██████████| 1/1 [00:00<00:00,  1.94it/s]
Test : 10 Loss: 0.0284 Acc: 0.9283 : 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Test : 11 Loss: 0.0283 Acc: 0.9283 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 12 Loss: 0.0288 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
Test : 13 Los

In [36]:
optimizer = torch.optim.Adam(model3.parameters(), 1e-7, amsgrad=True)
epochs = 50
for epoch in range(1, epochs + 1):
  train(model3, optimizer, epoch, train_dl3)
  valid(model3, optimizer, epoch, val_dl3, characters2)

Test : 1 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 2 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]
Test : 3 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
Test : 4 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Test : 5 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]
Test : 6 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
Test : 7 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
Test : 8 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]
Test : 9 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.93it/s]
Test : 10 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
Test : 11 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
Test : 12 Loss: 0.0367 Acc: 0.9300 : 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Test : 13 Los

In [37]:
torch.save(model3, 'model3.pth')
!cp model3.pth ./gdrive/MyDrive/HW5/model3.pth 