In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames[:3]:
#         print(os.path.join(dirname, filename))
#     if len(filenames) > 3:
#         print("...")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import csv
import numpy as np
import random
import time

from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
from torchvision.io import read_image
import torchvision.transforms as transforms

In [3]:
TRAIN_PATH = "/kaggle/input/captcha-hacker/train"
#TEST_PATH = "/kaggle/input/captcha-hacker/test"
#TRAIN_PATH = "./train"
#TEST_PATH = "./test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# try device = "cuda" 
# and change your settings/accelerator to GPU if you want it to run faster

In [4]:
code = {}
rev_code = {}

num = 0
for i in range(10):
    code[str(i)] = num
    rev_code[num] = str(i)
    num += 1

for i in range(ord('a'), ord('z') + 1):
    code[chr(i)]  = num
    rev_code[num] = chr(i)
    num += 1

#print(code)   



In [5]:
def calc_acc(output, label):  
    digits = int(output.shape[1] / 36)
    output, label = output.view((-1, digits , 36)), label.view((-1, digits, 36))
    output = nn.functional.softmax(output, dim=2)
    
    output, label = torch.argmax(output, dim=2), torch.argmax(label, dim=2)
    
    correct = torch.sum(output == label, dim=1)
    correct = torch.sum(correct == digits, dim=0)

    return correct

In [6]:
train_data = []
val_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.8:
            train_data.append(row)
        else:
            val_data.append(row)


## TASK1

In [7]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=224),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            return img, int(label)

    def __len__(self):
        return len(self.data)

In [8]:
train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [9]:
task1_model = resnet18(pretrained=True)
task1_model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
task1_model = task1_model.to(device)
#print(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [10]:

optimizer = torch.optim.Adam(task1_model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


for epoch in range(15):
    print(f"Epoch [{epoch}]")
    task1_model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = task1_model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        #torch.cuda.empty_cache()
        
    sample_count = 0
    correct_count = 0
    task1_model.eval()
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)

            pred = task1_model(image)
            loss = loss_fn(pred, label)

            pred = torch.argmax(pred, dim=1)

            sample_count += len(image)
            correct_count += (label == pred).sum()

            del image, label, pred
            #torch.cuda.empty_cache()
        
    print("accuracy (validation):", correct_count / sample_count)

torch.save(task1_model.state_dict(), "task1_model.pt")

Epoch [0]
accuracy (validation): tensor(0.6263, device='cuda:0')
Epoch [1]
accuracy (validation): tensor(0.9899, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.9924, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [4]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [5]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [6]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [7]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [8]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [9]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [10]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [11]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [12]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [13]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [14]
accuracy (validation): tensor(1., device='cuda:0')


## TASK2

In [11]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=224),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            #new_label = np.array([code[label[0]], code[label[1]]])
            #new_label = [0] * 72
            new_label = np.zeros(shape=72)
            new_label[code[label[0]]] = 1
            new_label[code[label[1]]+36] = 1
            #new_label += np.array([0, 36])
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [12]:
train_ds = Task2Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task2Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [13]:
task2_model = resnet18(pretrained=True)
task2_model.fc = nn.Linear(in_features=512, out_features=72, bias=True)
task2_model = task2_model.to(device)

In [14]:
optimizer = torch.optim.Adam(task2_model.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()


for epoch in range(15):
    print(f"Epoch [{epoch}]")
    task2_model.train()

    train_loss = 0.0
    for image, label in train_dl:

        image = image.to(device)
        label = label.to(device)

        pred = task2_model(image)
        loss = loss_fn(pred, label)
        
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        #torch.cuda.empty_cache()
    
    train_loss /= len(train_dl)
    print("train loss: {}".format(train_loss))

    sample_count = 0
    correct_count = 0
    task2_model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)

            pred = task2_model(image)
            loss = loss_fn(pred, label)
            val_loss += loss

            sample_count += len(image)
            correct_count += calc_acc(pred, label)

            del image, label, pred
            #torch.cuda.empty_cache()

        val_loss /= len(val_dl)
        print("val loss: {}".format(val_loss))
        
    print("accuracy (validation):", correct_count / sample_count)

torch.save(task2_model.state_dict(), "task2_model.pt")

Epoch [0]
train loss: 0.18872442841529846
val loss: 0.11333496868610382
accuracy (validation): tensor(0.0307, device='cuda:0')
Epoch [1]
train loss: 0.057765573263168335
val loss: 0.03991683945059776
accuracy (validation): tensor(0.8115, device='cuda:0')
Epoch [2]
train loss: 0.02292434498667717
val loss: 0.019220542162656784
accuracy (validation): tensor(0.9672, device='cuda:0')
Epoch [3]
train loss: 0.010974769480526447
val loss: 0.010702262632548809
accuracy (validation): tensor(0.9959, device='cuda:0')
Epoch [4]
train loss: 0.0065612331964075565
val loss: 0.00845502968877554
accuracy (validation): tensor(0.9980, device='cuda:0')
Epoch [5]
train loss: 0.004564496222883463
val loss: 0.006629194598644972
accuracy (validation): tensor(0.9939, device='cuda:0')
Epoch [6]
train loss: 0.0035351316910237074
val loss: 0.00470530753955245
accuracy (validation): tensor(0.9959, device='cuda:0')
Epoch [7]
train loss: 0.0029073588084429502
val loss: 0.0038334287237375975
accuracy (validation): te

## TASK3

In [15]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=(384, 288)),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            new_label = np.zeros(shape=144)
            for i in range(4):
                new_label[code[label[i]]+i*36] = 1
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [16]:
train_ds = Task3Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task3Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [17]:
task3_model = resnet18(pretrained=True)
task3_model.fc = nn.Linear(in_features=512, out_features=144, bias=True)
task3_model = task3_model.to(device)

In [18]:
optimizer = torch.optim.Adam(task3_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.8)
loss_fn = nn.MultiLabelSoftMarginLoss()


for epoch in range(100):
    print(f"Epoch [{epoch}]")
    task3_model.train()

    train_loss = 0.0
    for image, label in train_dl:

        image = image.to(device)
        label = label.to(device)

        pred = task3_model(image)
        loss = loss_fn(pred, label)
        
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        #torch.cuda.empty_cache()
    
    scheduler.step()
    train_loss /= len(train_dl)
    print("train loss: {}".format(train_loss))

    sample_count = 0
    correct_count = 0
    task3_model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)

            pred = task3_model(image)
            loss = loss_fn(pred, label)
            val_loss += loss

            sample_count += len(image)
            correct_count += calc_acc(pred, label)

            del image, label, pred
            #torch.cuda.empty_cache()

        val_loss /= len(val_dl)
        print("val loss: {}".format(val_loss))
        
    print("accuracy (validation):", correct_count / sample_count)

torch.save(task3_model.state_dict(), "task3_model.pt")

Epoch [0]
train loss: 0.19055402278900146
val loss: 0.2002299427986145
accuracy (validation): tensor(0., device='cuda:0')
Epoch [1]
train loss: 0.12125591933727264
val loss: 0.12125589698553085
accuracy (validation): tensor(0., device='cuda:0')
Epoch [2]
train loss: 0.1183542013168335
val loss: 0.11833962798118591
accuracy (validation): tensor(0., device='cuda:0')
Epoch [3]
train loss: 0.11407344788312912
val loss: 0.11697398126125336
accuracy (validation): tensor(0., device='cuda:0')
Epoch [4]
train loss: 0.10399691760540009
val loss: 0.10403509438037872
accuracy (validation): tensor(0., device='cuda:0')
Epoch [5]
train loss: 0.08697345852851868
val loss: 0.08629818260669708
accuracy (validation): tensor(0.0207, device='cuda:0')
Epoch [6]
train loss: 0.06647175550460815
val loss: 0.06558462977409363
accuracy (validation): tensor(0.1945, device='cuda:0')
Epoch [7]
train loss: 0.04788639396429062
val loss: 0.04890618845820427
accuracy (validation): tensor(0.4733, device='cuda:0')
Epoch 