In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install pandas==1.3.5
!pip install tqdm==4.64.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import numpy as np
import pandas as pd

import csv
import numpy as np
import random
import os
import time

from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
from torchvision.io import read_image
import torchvision.transforms as transforms

In [4]:
'''
Self defined encoding of characters
'''

code = {}
rev_code = {}

num = 0
for i in range(10):
    code[str(i)] = num
    rev_code[num] = str(i)
    num += 1

for i in range(ord('a'), ord('z') + 1):
    code[chr(i)]  = num
    rev_code[num] = chr(i)
    num += 1

In [5]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=288),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            return img, int(label)

    def __len__(self):
        return len(self.data)

In [6]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=288),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            #new_label = np.array([code[label[0]], code[label[1]]])
            #new_label = [0] * 72
            new_label = np.zeros(shape=72)
            new_label[code[label[0]]] = 1
            new_label[code[label[1]]+36] = 1
            #new_label += np.array([0, 36])
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [7]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        img = torch.as_tensor(img, dtype=torch.float32)
        
        transform = transforms.Compose([
            transforms.Resize(size=(384, 288)),
            transforms.Normalize(mean=[0, 0, 0], std=[255, 255, 255]),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        img = transform(img)
        if self.return_filename:
            return img, filename
        else:
            new_label = np.zeros(shape=144)
            for i in range(4):
                new_label[code[label[i]]+i*36] = 1
            new_label = torch.LongTensor(new_label)
            return img, new_label

    def __len__(self):
        return len(self.data)

In [8]:
TEST_PATH = "/content/drive/MyDrive/kaggle/input/captcha-hacker/test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
test_data = []
with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        test_data.append(row)

file = open('submission.csv', 'w', newline='')
csv_writer = csv.writer(file)
csv_writer.writerow(["filename", "label"])

test_ds = Task1Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)

task1_model = resnet18()
task1_model.fc = nn.Linear(in_features=512, out_features=10, bias=True)
task1_model.load_state_dict(torch.load("/content/drive/MyDrive/kaggle/task1_model.pt"))
task1_model.to(device, dtype=torch.float32)

task1_model.eval()
with torch.no_grad():
    for image, filenames in tqdm(test_dl):
        image = image.to(device)

        pred = task1_model(image)
        pred = torch.argmax(pred, dim=1)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], str(pred[i].item())])

        del image, pred

test_ds = Task2Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)

task2_model = resnet18()
task2_model.fc = nn.Linear(in_features=512, out_features=72, bias=True)
task2_model.load_state_dict(torch.load("/content/drive/MyDrive/kaggle/task2_model.pt"))
task2_model.to(device, dtype=torch.float32)

task2_model.eval()
with torch.no_grad():
    for image, filenames in tqdm(test_dl):
        image = image.to(device)

        pred = task2_model(image)
        pred = pred.view(-1, 2, 36)
        pred = torch.argmax(pred, dim=2)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], rev_code[pred[i][0].item()] + rev_code[pred[i][1].item()]])

        del image, pred

test_ds = Task3Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=100, drop_last=False, shuffle=False)

task3_model = resnet18()
task3_model.fc = nn.Linear(in_features=512, out_features=144, bias=True)
task3_model.load_state_dict(torch.load("/content/drive/MyDrive/kaggle/task3_model.pt"))
task3_model.to(device, dtype=torch.float32)

task3_model.eval()
with torch.no_grad():
    for image, filenames in tqdm(test_dl):
        image = image.to(device)

        pred = task3_model(image)
        pred = pred.view(-1, 4, 36)
        pred = torch.argmax(pred, dim=2)

        for i in range(len(filenames)):
            csv_writer.writerow([filenames[i], rev_code[pred[i][0].item()] + rev_code[pred[i][1].item()] + \
                                                rev_code[pred[i][2].item()] + rev_code[pred[i][3].item()]])    

        del image, pred
        
file.close()

16