In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames[:3]:
#         print(os.path.join(dirname, filename))
#     if len(filenames) > 3:
#         print("...")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import csv
import cv2
import numpy as np
import random
import os
import time

from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
from torchvision.io import read_image
import torchvision.transforms as transforms

In [3]:
TRAIN_PATH = "/kaggle/input/captcha-hacker/train"
TEST_PATH = "/kaggle/input/captcha-hacker/test"
#TRAIN_PATH = "./train"
#TEST_PATH = "./test"
device = torch.device("cuda")
# try device = "cuda" 
# and change your settings/accelerator to GPU if you want it to run faster

In [4]:
code = {}
rev_code = {}

num = 0
for i in range(10):
    code[str(i)] = num
    rev_code[num] = str(i)
    num += 1

for i in range(ord('a'), ord('z') + 1):
    code[chr(i)]  = num
    rev_code[num] = chr(i)
    num += 1

#print(code)   



In [5]:
def calc_acc(output, label):  
    digits = int(output.shape[1] / 36)
    output, label = output.view((-1, digits , 36)), label.view((-1, digits, 36))
    output = nn.functional.softmax(output, dim=2)
    #print(output.shape)
    #print(label.shape)
    output, label = torch.argmax(output, dim=2), torch.argmax(label, dim=2)
    #print(label)
    #print(output)
    correct = torch.sum(output == label, dim=1)
    #print(correct)
    correct = torch.sum(correct == digits, dim=0)

    return correct

In [6]:
train_data = []
val_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.8:
            train_data.append(row)
        else:
            val_data.append(row)

test_data = []
with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        test_data.append(row)


## TASK1

In [7]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        transform = transforms.Compose([
            transforms.Resize(size=224),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        #img = cv2.resize(img, (32, 32))
        #img = np.mean(img, axis=2)
        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            return img, int(label)

    def __len__(self):
        return len(self.data)

In [8]:
train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [9]:
task1_model = resnet18(pretrained=True)
task1_model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
task1_mode = task1_model.to(device)
#print(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [10]:
#model = Model().to(device)
optimizer = torch.optim.Adam(task1_model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


for epoch in range(15):
    print(f"Epoch [{epoch}]")
    task1_model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = task1_model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        torch.cuda.empty_cache()
        
    sample_count = 0
    correct_count = 0
    task1_model.eval()
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)

            pred = task1_model(image)
            loss = loss_fn(pred, label)

            pred = torch.argmax(pred, dim=1)

            sample_count += len(image)
            correct_count += (label == pred).sum()

            del image, label, pred
            torch.cuda.empty_cache()
        
    print("accuracy (validation):", correct_count / sample_count)


Epoch [0]
accuracy (validation): tensor(0.9483, device='cuda:0')
Epoch [1]
accuracy (validation): tensor(0.9767, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(1., device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.9922, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.9948, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [6]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [7]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [8]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [9]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [10]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [11]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [12]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [13]
accuracy (validation): tensor(0.9974, device='cuda:0')
Epoch [14]
accuracy (validation): tensor(1., device='cuda:0')


In [11]:
test_ds = Task1Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)


""" if os.path.exists('submission.csv'):
    csv_writer = csv.writer(open('submission.csv', 'a', newline=''))
else:
"""
if os.path.exists('submission.csv'):
    os.remove('submission.csv')
    
file = open('submission.csv', 'w', newline='')
csv_writer = csv.writer(file)
csv_writer.writerow(["filename", "label"])


task1_model.eval()
with torch.no_grad():
    for image, filenames in test_dl:
        image = image.to(device)

        pred = task1_model(image)
        pred = torch.argmax(pred, dim=1)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], str(pred[i].item())])

        del image, pred
        torch.cuda.empty_cache()
    
file.close()

del task1_model, train_ds, train_dl, val_ds, val_dl, test_ds, test_dl
torch.cuda.empty_cache()
time.sleep(10)

## TASK2

In [12]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        transform = transforms.Compose([
            transforms.Resize(size=224),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        #img = cv2.resize(img, (32, 32))
        #img = np.mean(img, axis=2)
        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            #new_label = np.array([code[label[0]], code[label[1]]])
            #new_label = [0] * 72
            new_label = np.zeros(shape=72)
            new_label[code[label[0]]] = 1
            new_label[code[label[1]]+36] = 1
            #new_label += np.array([0, 36])
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [13]:
train_ds = Task2Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task2Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [14]:
class Task2Model(nn.Module):
    def __init__(self):
        super(Task2Model, self).__init__()

        self.resnet = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        self.fc1 = nn.Linear(in_features=1000, out_features=36, bias=True)
        self.fc2 = nn.Linear(in_features=1000, out_features=36, bias=True)

    def forward(self, x):
        x = self.resnet(x)
        output_1 = self.fc1(x)
        output_2 = self.fc2(x)

        return output_1, output_2



In [15]:
task2_model = resnet18(pretrained=True)
task2_model.fc = nn.Linear(in_features=512, out_features=72, bias=True)
task2_model = task2_model.to(device)

In [16]:
#task2_model = Task2Model().to(device)
#weights = ResNet18_Weights.IMAGENET1K_V1
#preprocess = weights.transforms()

#task2_model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
#task2_model = densenet201(weights=DenseNet201_Weights.IMAGENET1K_V1)
#task2_model.fc = nn.Linear(in_features=512, out_features=72, bias=True)

optimizer = torch.optim.Adam(task2_model.parameters(), lr=1e-3)
#loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MultiLabelSoftMarginLoss()


for epoch in range(15):
    print(f"Epoch [{epoch}]")
    task2_model.train()

    train_loss = 0.0
    for image, label in train_dl:

        image = image.to(device)
        #label_1, label_2 = label[:, 0], label[:, 1]
        label = label.to(device)
        #label_1, label_2 = label_1.to(device, dtype=torch.long), label_2.to(device, dtype=torch.long)
        
        #pred_1, pred_2 = task2_model(image)
        #image_transformed = preprocess(image)

        pred = task2_model(image)
        loss = loss_fn(pred, label)
        #loss_1, loss_2 = loss_fn(pred_1, label_1), loss_fn(pred_2, label_2)
        
        #loss = loss_1 + loss_2
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        torch.cuda.empty_cache()
    
    #print(len(train_dl))
    train_loss /= len(train_dl)
    print("train loss: {}".format(train_loss))

    sample_count = 0
    correct_count = 0
    task2_model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)
            #label_1, label_2 = label[:, 0], label[:, 1]
            #label_1, label_2 = label_1.to(device, dtype=torch.long), label_2.to(device, dtype=torch.long)

            #pred_1, pred_2 = task2_model(image)
            #loss_1, loss_2 = loss_fn(pred_1, label_1), loss_fn(pred_2, label_2)

            #loss = loss_1 + loss_2
            #val_loss += loss
            #print("loss: {}".format(loss))

            #image_transformed = preprocess(image)

            pred = task2_model(image)
            loss = loss_fn(pred, label)
            #print("pred: ", pred)
            #print("label: ", label)
            val_loss += loss
            #pred_1, pred_2 = torch.argmax(pred_1, dim=1), torch.argmax(pred_2, dim=1)
            #pred = torch.stack([pred_1, pred_2], dim=1)

            sample_count += len(image)
            #correct = torch.sum(pred == label, dim=1)
            #print(correct)
            correct_count += calc_acc(pred, label)
            #correct_count += torch.sum(correct == 2)

            del image, label, pred
            torch.cuda.empty_cache()

        val_loss /= len(val_dl)
        print("val loss: {}".format(val_loss))
        
    print("accuracy (validation):", correct_count / sample_count)

Epoch [0]
train loss: 0.1962503045797348
val loss: 0.17250965535640717
accuracy (validation): tensor(0.0119, device='cuda:0')
Epoch [1]
train loss: 0.06740159541368484
val loss: 0.04786376655101776
accuracy (validation): tensor(0.6950, device='cuda:0')
Epoch [2]
train loss: 0.028290068730711937
val loss: 0.02163233608007431
accuracy (validation): tensor(0.9446, device='cuda:0')
Epoch [3]
train loss: 0.013269573450088501
val loss: 0.013592088595032692
accuracy (validation): tensor(0.9564, device='cuda:0')
Epoch [4]
train loss: 0.007797346916049719
val loss: 0.010911944322288036
accuracy (validation): tensor(0.9743, device='cuda:0')
Epoch [5]
train loss: 0.005378466099500656
val loss: 0.007089290302246809
accuracy (validation): tensor(0.9921, device='cuda:0')
Epoch [6]
train loss: 0.004010448697954416
val loss: 0.007270704489201307
accuracy (validation): tensor(0.9782, device='cuda:0')
Epoch [7]
train loss: 0.0032453290186822414
val loss: 0.004959181882441044
accuracy (validation): tenso

In [17]:
test_ds = Task2Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)

file = open('submission.csv', 'a', newline='')
csv_writer = csv.writer(file)

task2_model.eval()
with torch.no_grad():
    for image, filenames in test_dl:
        image = image.to(device)

        pred = task2_model(image)
        pred = pred.view(-1, 2, 36)
        pred = torch.argmax(pred, dim=2)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], rev_code[pred[i][0].item()] + rev_code[pred[i][1].item()]])

        del image, pred
        torch.cuda.empty_cache()
    
file.close()
del task2_model, train_ds, train_dl, val_ds, val_dl, test_ds, test_dl
torch.cuda.empty_cache()
time.sleep(10)

## TASK3

In [18]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        transform = transforms.Compose([
            transforms.Resize(size=224),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        #img = cv2.resize(img, (32, 32))
        #img = np.mean(img, axis=2)
        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            new_label = np.zeros(shape=144)
            for i in range(4):
                new_label[code[label[i]]+i*36] = 1
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [19]:
train_ds = Task3Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task3Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)

In [20]:
class Task3Model(nn.Module):
    def __init__(self):
        super(Task3Model, self).__init__()

        self.resnet = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
        self.fc1 = nn.Linear(in_features=1000, out_features=36, bias=True)
        self.fc2 = nn.Linear(in_features=1000, out_features=36, bias=True)
        self.fc3 = nn.Linear(in_features=1000, out_features=36, bias=True)
        self.fc4 = nn.Linear(in_features=1000, out_features=36, bias=True)

    def forward(self, x):
        x = self.resnet(x)
        output_1 = self.fc1(x)
        output_2 = self.fc2(x)
        output_3 = self.fc3(x)
        output_4 = self.fc4(x)

        return output_1, output_2, output_3, output_4

In [21]:
task3_model = resnet18(pretrained=True)
task3_model.fc = nn.Linear(in_features=512, out_features=144, bias=True)
task3_model = task3_model.to(device)

In [22]:
#task3_model = Task3Model().to(device)

optimizer = torch.optim.Adam(task3_model.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()
#loss_fn = nn.CrossEntropyLoss()


for epoch in range(30):
    print(f"Epoch [{epoch}]")
    task3_model.train()

    train_loss = 0.0
    for image, label in train_dl:

        image = image.to(device)
        #label_1, label_2 = label[:, 0], label[:, 1]
        label = label.to(device)
        #label_1, label_2 = label_1.to(device, dtype=torch.long), label_2.to(device, dtype=torch.long)
        
        #pred_1, pred_2 = task2_model(image)
        #image_transformed = preprocess(image)

        pred = task3_model(image)
        loss = loss_fn(pred, label)
        #loss_1, loss_2 = loss_fn(pred_1, label_1), loss_fn(pred_2, label_2)
        
        #loss = loss_1 + loss_2
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        del image, label, pred
        torch.cuda.empty_cache()
    
    #print(len(train_dl))
    train_loss /= len(train_dl)
    print("train loss: {}".format(train_loss))

    sample_count = 0
    correct_count = 0
    task3_model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)
            #label_1, label_2 = label[:, 0], label[:, 1]
            #label_1, label_2 = label_1.to(device, dtype=torch.long), label_2.to(device, dtype=torch.long)

            #pred_1, pred_2 = task2_model(image)
            #loss_1, loss_2 = loss_fn(pred_1, label_1), loss_fn(pred_2, label_2)

            #loss = loss_1 + loss_2
            #val_loss += loss
            #print("loss: {}".format(loss))

            #image_transformed = preprocess(image)

            pred = task3_model(image)
            loss = loss_fn(pred, label)
            #print("pred: ", pred)
            #print("label: ", label)
            val_loss += loss
            #pred_1, pred_2 = torch.argmax(pred_1, dim=1), torch.argmax(pred_2, dim=1)
            #pred = torch.stack([pred_1, pred_2], dim=1)

            sample_count += len(image)
            #correct = torch.sum(pred == label, dim=1)
            #print(correct)
            correct_count += calc_acc(pred, label)
            #correct_count += torch.sum(correct == 2)

            del image, label, pred
            torch.cuda.empty_cache()

        val_loss /= len(val_dl)
        print("val loss: {}".format(val_loss))
        
    print("accuracy (validation):", correct_count / sample_count)

Epoch [0]
train loss: 0.1917160600423813
val loss: 0.14850692451000214
accuracy (validation): tensor(0., device='cuda:0')
Epoch [1]
train loss: 0.1174943670630455
val loss: 0.115991972386837
accuracy (validation): tensor(0., device='cuda:0')
Epoch [2]
train loss: 0.09696988761425018
val loss: 0.08866866677999496
accuracy (validation): tensor(0.0053, device='cuda:0')
Epoch [3]
train loss: 0.06995102763175964
val loss: 0.061455752700567245
accuracy (validation): tensor(0.1373, device='cuda:0')
Epoch [4]
train loss: 0.0475832037627697
val loss: 0.048766396939754486
accuracy (validation): tensor(0.3468, device='cuda:0')
Epoch [5]
train loss: 0.031772732734680176
val loss: 0.033385686576366425
accuracy (validation): tensor(0.8011, device='cuda:0')
Epoch [6]
train loss: 0.021780159324407578
val loss: 0.025534093379974365
accuracy (validation): tensor(0.8697, device='cuda:0')
Epoch [7]
train loss: 0.015332452952861786
val loss: 0.01945440284907818
accuracy (validation): tensor(0.9419, device=

In [23]:
test_ds = Task3Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)

file = open('submission.csv', 'a', newline='')
csv_writer = csv.writer(file)

task3_model.eval()

with torch.no_grad():
    for image, filenames in test_dl:
        image = image.to(device)

        pred = task3_model(image)
        pred = pred.view(-1, 4, 36)
        pred = torch.argmax(pred, dim=2)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], rev_code[pred[i][0].item()] + rev_code[pred[i][1].item()] + \
                                                rev_code[pred[i][2].item()] + rev_code[pred[i][3].item()]])    

        del image, pred
        torch.cuda.empty_cache()
    
file.close()    
del task3_model, train_ds, train_dl, val_ds, val_dl, test_ds, test_dl
torch.cuda.empty_cache()