In [2]:
TRAIN_PATH = "/content/train"
TEST_PATH = "/content/test"
device = "cuda"

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import csv
import cv2
import random

from tqdm import tqdm

import torch
import torch.nn as nn
import torchvision.models as models
from collections import OrderedDict
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold

!pwd
print(os.getcwd())
os.environ['KAGGLE_CONFIG_DIR'] = "/content"
# !kaggle competitions download -c captcha-hacker
# !unzip captcha-hacker.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train/task2/CiRRerGOUkeBAQCZ.png  
  inflating: train/task2/CisfFJiwBkCP7ZJc.png  
  inflating: train/task2/Ciu3hIBZEkMS72b1.png  
  inflating: train/task2/CjLeyGNQ1F5EFJno.png  
  inflating: train/task2/CkpjVyGfldkertEP.png  
  inflating: train/task2/ClmmT4me3oVjU2uo.png  
  inflating: train/task2/CnWC54NdezW58Wiu.png  
  inflating: train/task2/CnuOrI1foWT8AIMh.png  
  inflating: train/task2/Cp6qg9AtP1oWIyxO.png  
  inflating: train/task2/Ct9G7oWGZs8EkjHN.png  
  inflating: train/task2/CzFBYB8r8vaCXGWy.png  
  inflating: train/task2/D0qdbF25TSdSjl8Q.png  
  inflating: train/task2/D1J6xGwPyit2XH7h.png  
  inflating: train/task2/D1LY7MfO4QnFQPKY.png  
  inflating: train/task2/D1W5bh1Wk3l5PLC4.png  
  inflating: train/task2/D1uDpwsCi15rDIrR.png  
  inflating: train/task2/D2EPn2DdXmdH3iUO.png  
  inflating: train/task2/D5Ece6l257nF0KKt.png  
  inflating: train/task2/D8RyGM7pKWCQuhO6.png  
  inflating: train/task

In [3]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        img = cv2.medianBlur(img, 3)
        img = np.mean(img, axis=2)
        img = torch.FloatTensor((img - 128) / 128)
        img = img.view(1, *img.size())
        if self.return_filename:
            return img, filename
        else:
            return img, int(label)

    def __len__(self):
        return len(self.data)

In [4]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
        nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = 3, stride = 1, padding = 1,),# padding = (kernel_size-1)/2 
        nn.BatchNorm2d(16),
        nn.ReLU(),# (16, 72, 72)
        nn.MaxPool2d(kernel_size = 2)# (16, 36, 36)
        )
        self.conv2 = nn.Sequential(    
        nn.Conv2d(16, 32, 3, 1, 1),# (32, 36, 36)
        nn.BatchNorm2d(32),
        nn.ReLU(),# (32,36,36)
        nn.MaxPool2d(2)# (32, 18, 18)
        )
        self.conv3 = nn.Sequential(    
        nn.Conv2d(32, 64, 3, 1, 1),# (64, 18, 18)
        nn.BatchNorm2d(64),
        nn.ReLU(),# (64,18,18)
        nn.MaxPool2d(2)# (64, 9, 9)
        )
        self.conv4 = nn.Sequential(    
        nn.Conv2d(64, 128, 3, 1, 1),# (128, 9, 9)
        nn.BatchNorm2d(128),
        nn.ReLU(),# (128,9,9)
        )
        self.out = nn.Sequential(
          nn.Linear(128*9*9, 1024),
          nn.ReLU(),
          nn.Dropout(),
          nn.Linear(1024, 256),
          nn.ReLU(),
          nn.Dropout(),
          nn.Linear( 256, 10)
        )
        
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0),-1)
        return self.out(x)

In [5]:

train_data = []
val_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        all_data.append(row)
        if random.random() < 0.8:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=64, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)

torch.Size([1, 72, 72])


In [5]:

# train_data = []
# val_data = []
# test_data = []

# with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
#     for row in csv.reader(csvfile, delimiter=','):
#         r = random.random()
#         if r < 0.4:
#             train_data.append(row)
#         elif r < 0.5:
#             val_data.append(row)
#         else:
#             test_data.append(row)

# train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
# train_dl = DataLoader(train_ds, batch_size=64, num_workers=2, drop_last=True, shuffle=True)

# val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
# val_dl = DataLoader(val_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)
# test_ds = Task1Dataset(test_data, root=TRAIN_PATH)
# test_dl = DataLoader(test_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)
# print(len(train_ds))
# print(len(val_ds))
# print(len(test_ds))

823
197
980


In [6]:
tmp = np.zeros((123))
with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[0]=="filename":
            continue
        for c in row[1]:
            if not tmp[ord(c)]:
                tmp[ord(c)] = 1
for i, c in enumerate(tmp):
    if not c and ((i >= 48 and i<=57) or (i >= 97 and i<=122)):
        print(chr(i))
CAPTCHA_DICT = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                'a', 'c', 'd', 'e', 'f', 'h','j', 'k', 'm', 'n', 'p', 'r', 's', 't','v','w','x','y' ]
print(len(CAPTCHA_DICT))

b
g
i
l
o
q
u
z
28


In [18]:
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
maxAcc = 0
flag = 1
train_dl = DataLoader(train_ds, batch_size=30, num_workers=2, drop_last=True, shuffle=True)
for epoch in range(150):
    print(f"Epoch [{epoch}]")
    model.train()
    total_loss = 0 
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if flag and total_loss < 0.5:
        for g in optimizer.param_groups:
            g['lr'] = 1e-4
        flag = 0
    sample_count = 0
    correct_count = 0
    model.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        pred = torch.argmax(pred, dim=1)
        sample_count += len(image)
        correct_count += (label == pred).sum()
    acc = correct_count / sample_count
    if acc > maxAcc:
        print(f"Loss: {total_loss}")
        print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")
        print("Saved!")
        maxAcc = acc
        torch.save(model.state_dict(), './model1.pth')
        torch.save(optimizer.state_dict(), './optimizer1.pth')


Epoch [0]
Loss: 123.19532012939453
accuracy (validation): 0.39408865571022034 160/406
Saved!
Epoch [1]
Loss: 62.27345275878906
accuracy (validation): 0.6527093648910522 265/406
Saved!
Epoch [2]
Loss: 30.365144729614258
accuracy (validation): 0.9162561297416687 372/406
Saved!
Epoch [3]
Loss: 12.609612464904785
accuracy (validation): 0.9556649923324585 388/406
Saved!
Epoch [4]
Loss: 8.564452171325684
accuracy (validation): 0.9753694534301758 396/406
Saved!
Epoch [5]
Epoch [6]
Loss: 7.300620079040527
accuracy (validation): 0.9827585816383362 399/406
Saved!
Epoch [7]
Loss: 4.525292873382568
accuracy (validation): 0.9852216839790344 400/406
Saved!
Epoch [8]
Epoch [9]
Epoch [10]
Epoch [11]
Epoch [12]
Epoch [13]
Epoch [14]
Epoch [15]
Epoch [16]
Epoch [17]
Epoch [18]
Loss: 1.5096087455749512
accuracy (validation): 0.9876847267150879 401/406
Saved!
Epoch [19]
Epoch [20]
Epoch [21]
Epoch [22]
Loss: 0.6452528238296509
accuracy (validation): 0.9926108121871948 403/406
Saved!
Epoch [23]
Epoch [24]


In [88]:
# model = Model().to(device)
# model.load_state_dict(torch.load(f'/content/model1.pth'))
# loss_fn = nn.CrossEntropyLoss()
# minLoss = 1
# maxAcc = 0

# for epoch in range(3):
#     print(f"Epoch [{epoch}]")
#     sample_count = 0
#     correct_count = 0
#     model.eval()
#     for image, label in test_dl:
#         image = image.to(device)
#         label = label.to(device)
        
#         pred = model(image)
#         loss = loss_fn(pred, label)
        
#         pred = torch.argmax(pred, dim=1)
#         sample_count += len(image)
#         correct_count += (label == pred).sum()
#     acc = correct_count / sample_count
#     print(f"Loss: {total_loss}")    
#     print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")

Epoch [0]
Loss: 0.01842528209090233
accuracy (validation): 0.9916753768920898 953/961
Epoch [1]
Loss: 0.01842528209090233
accuracy (validation): 0.9916753768920898 953/961
Epoch [2]
Loss: 0.01842528209090233
accuracy (validation): 0.9916753768920898 953/961


In [7]:
def encoding(label):
    onehot_code = np.zeros(len(CAPTCHA_DICT) * len(label), dtype=float)
    for i, char in enumerate(label):
        index = i * len(CAPTCHA_DICT) + CAPTCHA_DICT.index(char)
        onehot_code[index] = 1.0
    return onehot_code
def decoding(onehot):
    p = ''
    for i, id in enumerate(np.where(onehot == 1)[0]):
        p += CAPTCHA_DICT[id - i*len(CAPTCHA_DICT)]
    return p
def decodingP(idx):
    p = ''
    for x in idx:
      p += CAPTCHA_DICT[x]
    return p
# decoding(encoding("ac"))
# decodingP([10,20])

In [28]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=0):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        img = cv2.resize(img, (108, 108), interpolation=cv2.INTER_LANCZOS4)
        img = cv2.medianBlur(img, 3)
        img = np.mean(img, axis=2)
        img = torch.FloatTensor((img - 128) / 128)
        img = img.view(1, *img.size())
        if self.return_filename == 2:
            return img, filename
        elif self.return_filename == 1:
            return img, label
        else:
            return img, encoding(label)

    def __len__(self):
        return len(self.data)

In [51]:
train_ds = Task2Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=32, num_workers=2, drop_last=True, shuffle=True)
val_ds = Task2Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)

In [52]:

model2=models.resnet18(pretrained=True)
fc_features = model2.fc.in_features
model2.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*2)
model2.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model_state = model2.state_dict()
model2 = model2.to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()
maxAcc = 0
flag = 1

for epoch in range(75):
    print(f"Epoch [{epoch}]")
    model2.train()
    total_loss = 0 
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model2(image)
        loss = loss_fn(pred, label)
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if flag and total_loss < 0.05:
      for g in optimizer.param_groups:
          g['lr'] = 1e-4
      flag = 0
        
    sample_count = 0
    correct_count = 0
    model2.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model2(image)
        loss = loss_fn(pred, label)
        
        sample_count += len(image)
        pred = pred.view((pred.shape[0], 2, 28))
        pred = torch.argmax(pred, axis=2)
            
        for i in range(label.shape[0]):
              if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
                  correct_count += 1
    acc = correct_count / sample_count
    # print("accuracy (validation):", acc)
    print(f"Loss: {total_loss}")
    if acc > maxAcc:
        print(f"accuracy (validation): {acc} correct: {correct_count} sample: {sample_count}")
        maxAcc = acc
        torch.save(model2.state_dict(), './model2.pth')
        torch.save(optimizer.state_dict(), './optimizer2.pth')


Epoch [0]


Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
OSError: [Errno 9] Bad file descriptor
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 235, in _feed
    close()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


Loss: 10.212674320933685
accuracy (validation): 0.1663286004056795 correct: 82 sample: 493
Epoch [1]
Loss: 3.1984688531742913
accuracy (validation): 0.8498985801217038 correct: 419 sample: 493
Epoch [2]
Loss: 1.0852719645272118
accuracy (validation): 0.8782961460446247 correct: 433 sample: 493
Epoch [3]
Loss: 0.5836932983103478
accuracy (validation): 0.9087221095334685 correct: 448 sample: 493
Epoch [4]
Loss: 0.34820665411822493
accuracy (validation): 0.9452332657200812 correct: 466 sample: 493
Epoch [5]
Loss: 0.325994463153741
Epoch [6]
Loss: 0.24344892094455498
Epoch [7]
Loss: 0.30193826160034853
Epoch [8]
Loss: 0.30266551763178323
accuracy (validation): 0.9614604462474645 correct: 474 sample: 493
Epoch [9]
Loss: 0.14788501158381348
Epoch [10]
Loss: 0.2210010751827093
Epoch [11]
Loss: 0.1961508259786058
Epoch [12]
Loss: 0.10610701167043612
Epoch [13]
Loss: 0.07764693877314274
accuracy (validation): 0.9675456389452333 correct: 477 sample: 493
Epoch [14]
Loss: 0.03001233700317382
accur

KeyboardInterrupt: ignored

In [48]:
# test_ds = Task2Dataset(test_data, root=TRAIN_PATH)
# test_dl = DataLoader(test_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)
# model2.load_state_dict(torch.load(f'/content/model2.pth'))
# loss_fn = nn.MultiLabelSoftMarginLoss()
# print(optimizer.param_groups[0]['lr'])
# minLoss = 1
# maxAcc = 0


# for epoch in range(3):
#     print(f"Epoch [{epoch}]")
#     sample_count = 0
#     correct_count = 0
#     model2.eval()
#     for image, label in test_dl:
#         image = image.to(device)
#         label = label.to(device)
        
#         pred = model2(image)
#         sample_count += len(image)
#         pred = pred.view((pred.shape[0], 2, 28))
#         pred = torch.argmax(pred, axis=2)
            
#         for i in range(label.shape[0]):
#               if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
#                   correct_count += 1
#     acc = correct_count / sample_count
#     print(f"Loss: {total_loss}")    
#     print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")

0.0001
Epoch [0]
Loss: 0.0030553878379283267
accuracy (validation): 0.9650582362728786 1160/1202
Epoch [1]
Loss: 0.0030553878379283267
accuracy (validation): 0.9650582362728786 1160/1202
Epoch [2]
Loss: 0.0030553878379283267
accuracy (validation): 0.9650582362728786 1160/1202


In [55]:
val_dl = DataLoader(val_ds, batch_size=32, num_workers=2, drop_last=False, shuffle=False)
model2 = models.resnet18(pretrained=True)
fc_features = model2.fc.in_features
model2.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*2)
model2.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model2 = model2.to(device)
model2.load_state_dict(torch.load(f'/content/model2.pth'))
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
optimizer.load_state_dict(torch.load(f"/content/optimizer2.pth"))
loss_fn = nn.MultiLabelSoftMarginLoss()
minLoss = 1
maxAcc = 0
cnt = 0

for epoch in range(35):
    print(f"Epoch [{epoch}]")
    model2.train()
    total_loss = 0
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model2(image)
        loss = loss_fn(pred, label)
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    sample_count = 0
    correct_count = 0
    model2.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model2(image)
        
        sample_count += len(image)
        pred = pred.view((pred.shape[0], 2, 28))
        pred = torch.argmax(pred, axis=2)
            
        for i in range(label.shape[0]):
              if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
                  correct_count += 1
    acc = correct_count / sample_count
    print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")
    # for image, label in test_dl:
    #     image = image.to(device)
    #     label = label.to(device)
        
    #     pred = model2(image)
        
    #     sample_count += len(image)
    #     pred = pred.view((pred.shape[0], 2, 28))
    #     pred = torch.argmax(pred, axis=2)
            
    #     for i in range(label.shape[0]):
    #           if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
    #               correct_count += 1
    # acc = correct_count / sample_count
    print(f"Loss: {total_loss}")
    print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")
    if total_loss < minLoss:
        print("Saved!")
        minLoss = total_loss
        maxAcc = acc
        torch.save(model2.state_dict(), './model2.pth')

Epoch [0]
accuracy (validation): 0.9979716024340771 492/493
Loss: 0.04914830916511694
accuracy (validation): 0.9979716024340771 492/493
Saved!
Epoch [1]
accuracy (validation): 1.0 493/493
Loss: 0.009610077407849991
accuracy (validation): 1.0 493/493
Saved!
Epoch [2]
accuracy (validation): 1.0 493/493
Loss: 0.006556283603045823
accuracy (validation): 1.0 493/493
Saved!
Epoch [3]
accuracy (validation): 1.0 493/493
Loss: 0.005561705838594461
accuracy (validation): 1.0 493/493
Saved!
Epoch [4]
accuracy (validation): 1.0 493/493
Loss: 0.0051565217997388505
accuracy (validation): 1.0 493/493
Saved!
Epoch [5]
accuracy (validation): 1.0 493/493
Loss: 0.004890442363399158
accuracy (validation): 1.0 493/493
Saved!
Epoch [6]
accuracy (validation): 1.0 493/493
Loss: 0.004695517789916659
accuracy (validation): 1.0 493/493
Saved!
Epoch [7]
accuracy (validation): 1.0 493/493
Loss: 0.004541167176602128
accuracy (validation): 1.0 493/493
Saved!
Epoch [8]
accuracy (validation): 1.0 493/493
Loss: 0.00441

In [19]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=0):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        img = cv2.resize(img, (144, 108), interpolation=cv2.INTER_LANCZOS4)
        img = np.mean(img, axis=2)
        img = torch.FloatTensor((img - 128) / 128)
        img = img.view(1, *img.size())
        if self.return_filename == 2:
            return img, filename
        elif self.return_filename == 1:
            return img, label
        else:
            return img, encoding(label)

    def __len__(self):
        return len(self.data)

In [20]:

train_ds = Task3Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=72, num_workers=2, drop_last=True, shuffle=True)
val_ds = Task3Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)

2383
617


In [22]:
model3=models.resnet18(pretrained=True)
fc_features = model3.fc.in_features
model3.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*4)
model3.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model_state = model3.state_dict()
model3 = model3.to(device)
optimizer = torch.optim.Adam(model3.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()
maxAcc = 0
flag = 1

for epoch in range(125):
    print(f"Epoch [{epoch}]")
    model3.train()
    total_loss = 0
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model3(image)
        loss = loss_fn(pred, label)
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if flag and total_loss < 0.002:
        for g in optimizer.param_groups:
            g['lr'] = 1e-4
        flag = 0    
    sample_count = 0
    correct_count = 0
    model3.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model3(image)
        loss = loss_fn(pred, label)
        
        sample_count += len(image)
        pred = pred.view((pred.shape[0], 4, 28))
        pred = torch.argmax(pred, axis=2)
            
        for i in range(label.shape[0]):
              if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
                  correct_count += 1
    acc = correct_count / sample_count
    # print("accuracy (validation):", acc)
    if acc > maxAcc:
        print(f"Loss: {total_loss}")
        print(f"accuracy (validation): {acc} correct: {correct_count} sample: {sample_count}")
        maxAcc = acc
        torch.save(model3.state_dict(), './model3.pth')
        torch.save(optimizer.state_dict(), './optimizer3.pth')


Epoch [0]
Epoch [1]
Epoch [2]
Loss: 4.186692486642535
accuracy (validation): 0.0016207455429497568 correct: 1 sample: 617
Epoch [3]
Loss: 3.0085066149385673
accuracy (validation): 0.07293354943273905 correct: 45 sample: 617
Epoch [4]
Loss: 1.9791059163301596
accuracy (validation): 0.26580226904376014 correct: 164 sample: 617
Epoch [5]
Loss: 1.246597643241017
accuracy (validation): 0.5251215559157212 correct: 324 sample: 617
Epoch [6]
Loss: 0.7805168460837077
accuracy (validation): 0.640194489465154 correct: 395 sample: 617
Epoch [7]
Loss: 0.5104181932962909
accuracy (validation): 0.7179902755267423 correct: 443 sample: 617
Epoch [8]
Loss: 0.36197709905804376
accuracy (validation): 0.7601296596434359 correct: 469 sample: 617
Epoch [9]
Loss: 0.2592186874038042
accuracy (validation): 0.7925445705024311 correct: 489 sample: 617
Epoch [10]
Loss: 0.19070184918833208
accuracy (validation): 0.7974068071312804 correct: 492 sample: 617
Epoch [11]
Loss: 0.1447134319669683
accuracy (validation): 0

In [47]:
# test_ds = Task3Dataset(test_data, root=TRAIN_PATH)
# test_dl = DataLoader(test_ds, batch_size=500, num_workers=2, drop_last=False, shuffle=False)
# model3.load_state_dict(torch.load(f'/content/model3.pth'))
# loss_fn = nn.MultiLabelSoftMarginLoss()
# print(optimizer.param_groups[0]['lr'])
# minLoss = 1
# maxAcc = 0


# for epoch in range(3):
#     print(f"Epoch [{epoch}]")
#     sample_count = 0
#     correct_count = 0
#     model3.eval()
#     for image, label in test_dl:
#         image = image.to(device)
#         label = label.to(device)
        
#         pred = model3(image)
#         sample_count += len(image)
#         pred = pred.view((pred.shape[0], 4, 28))
#         pred = torch.argmax(pred, axis=2)
            
#         for i in range(label.shape[0]):
#               if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
#                   correct_count += 1
#     acc = correct_count / sample_count
#     print(f"Loss: {total_loss}")    
#     print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")

0.0001
Epoch [0]
Loss: 0.011004831724412613
accuracy (validation): 0.9845741113346748 1468/1491
Epoch [1]
Loss: 0.011004831724412613
accuracy (validation): 0.9845741113346748 1468/1491
Epoch [2]
Loss: 0.011004831724412613
accuracy (validation): 0.9845741113346748 1468/1491


In [26]:
val_dl = DataLoader(val_ds, batch_size=75, num_workers=2, drop_last=False, shuffle=False)
model3 = models.resnet18(pretrained=True)
fc_features = model3.fc.in_features
model3.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*4)
model3.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model3 = model3.to(device)
model3.load_state_dict(torch.load(f'/content/model3.pth'))
optimizer = torch.optim.Adam(model3.parameters(), lr=1e-3)
optimizer.load_state_dict(torch.load(f"/content/optimizer3.pth"))
loss_fn = nn.MultiLabelSoftMarginLoss()
minLoss = 1
maxAcc = 0
# for g in optimizer.param_groups:
#       g['lr'] = 1e-5

for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model3.train()
    total_loss = 0
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model3(image)
        loss = loss_fn(pred, label)
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    sample_count = 0
    correct_count = 0
    model3.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model3(image)
        loss = loss_fn(pred, label)
        
        sample_count += len(image)
        pred = pred.view((pred.shape[0], 4, 28))
        pred = torch.argmax(pred, axis=2)
            
        for i in range(label.shape[0]):
              if decoding(label[i].cpu().numpy()) == decodingP(pred[i].cpu().numpy()):
                  correct_count += 1
    acc = correct_count / sample_count
    print(f"Loss: {total_loss}")
    print(f"accuracy (validation): {acc} {correct_count}/{sample_count}")
    if total_loss < minLoss and acc >= maxAcc:
        # print(f"Loss: {total_loss}")
        print("Saved!")
        minLoss = total_loss
        maxAcc = acc
        torch.save(model3.state_dict(), './model33.pth')
        # torch.save(optimizer.state_dict(), './optimizer3.pth')

Epoch [0]
Loss: 0.07042730716792411
accuracy (validation): 0.9918962722852512 612/617
Saved!
Epoch [1]
Loss: 0.03139138744565602
accuracy (validation): 0.9967585089141004 615/617
Saved!
Epoch [2]
Loss: 0.010151321037262314
accuracy (validation): 1.0 617/617
Saved!
Epoch [3]
Loss: 0.004965153848029481
accuracy (validation): 1.0 617/617
Saved!
Epoch [4]
Loss: 0.00362048291164338
accuracy (validation): 1.0 617/617
Saved!
Epoch [5]
Loss: 0.00286167162963852
accuracy (validation): 1.0 617/617
Saved!
Epoch [6]
Loss: 0.00245306087447951
accuracy (validation): 1.0 617/617
Saved!
Epoch [7]
Loss: 0.0021985013175527986
accuracy (validation): 1.0 617/617
Saved!
Epoch [8]
Loss: 0.0020247029575538563
accuracy (validation): 1.0 617/617
Saved!
Epoch [9]
Loss: 0.0018968027660225355
accuracy (validation): 1.0 617/617
Saved!
Epoch [10]
Loss: 0.0017969302311404173
accuracy (validation): 1.0 617/617
Saved!
Epoch [11]
Loss: 0.0017153789636413337
accuracy (validation): 1.0 617/617
Saved!
Epoch [12]
Loss: 0.0

In [None]:
# torch.cuda.memory_summary(device=None, abbreviated=False)
# torch.cuda.empty_cache()

In [29]:
# test_data = []
# with open(f'/content/sample_submission.csv', newline='') as csvfile:
#     for row in csv.reader(csvfile, delimiter=','):
#         test_data.append(row)

# test_ds = Task1Dataset(test_data, root=TEST_PATH, return_filename=True)
# test_dl = DataLoader(test_ds, batch_size=500, num_workers=1, drop_last=False, shuffle=False)
# print(len(test_ds))

# if os.path.exists('submission.csv'):
#     csv_writer = csv.writer(open('submission.csv', 'a', newline=''))
# else:
#     csv_writer = csv.writer(open('submission.csv', 'w', newline=''))
#     csv_writer.writerow(["filename", "label"])
# cnt = 0
# model = Model().to(device)
# model.load_state_dict(torch.load(f'/content/model1.pth'))
# model.eval()
# for image, filenames in test_dl:
#     image = image.to(device)
    
#     pred = model(image)
#     pred = torch.argmax(pred, dim=1)
    
#     for i in range(len(filenames)):
#         csv_writer.writerow([filenames[i], str(pred[i].item())])
#         cnt += 1
# test_ds = Task2Dataset(test_data, root=TEST_PATH, return_filename=2)
# print(cnt)
# print(len(test_ds))
# model2 = models.resnet18(pretrained=True)
# fc_features = model2.fc.in_features
# model2.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*2)
# model2.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# model2 = model2.to(device)
# model2.load_state_dict(torch.load(f'/content/model2.pth'))
# test_dl = DataLoader(test_ds, batch_size=500, num_workers=1, drop_last=False, shuffle=False)
# model2.eval()
# for image, filenames in test_dl:
#     image = image.to(device)
#     pred = model2(image)
#     pred = pred.view((pred.shape[0], 2, 28))
#     pred = torch.argmax(pred, axis=2)
#     for i in range(len(filenames)):
#         csv_writer.writerow([filenames[i], decodingP(pred[i].cpu().numpy())])
#         # csv_writer.writerow([filenames[i], 0])
#         cnt += 1
# test_ds = Task3Dataset(test_data, root=TEST_PATH, return_filename=2)
# print(cnt)
# print(len(test_ds))
# test_dl = DataLoader(test_ds, batch_size=200, num_workers=0, drop_last=False, shuffle=False)
# model3 = models.resnet18(pretrained=True)
# fc_features = model3.fc.in_features
# model3.fc = nn.Linear(fc_features, len(CAPTCHA_DICT)*4)
# model3.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# model3 = model3.to(device)
# model3.load_state_dict(torch.load(f'/content/model33.pth'))
# model3.eval()
# for image, filenames in test_dl:
#     image = image.to(device)
#     pred = model3(image)
#     pred = pred.view((pred.shape[0], 4, 28))
#     pred = torch.argmax(pred, axis=2)
#     for i in range(len(filenames)):
#         # print(i, filenames[i])
#         # csv_writer.writerow([filenames[i], 0])
#         csv_writer.writerow([filenames[i], decodingP(pred[i].cpu().numpy())])
#         cnt += 1
# print(cnt)
# cnt = 0
# with open(f'/content/submission.csv', newline='') as csvfile:
#     for row in csv.reader(csvfile, delimiter=','):
#         cnt+=1
#     print(cnt)
# cnt = 10001 - cnt

# csv_writer = csv.writer(open('submission.csv', 'a', newline=''))
# for i, (image, filenames) in enumerate(test_dl):
#     if i == 1:
#         image = image.to(device)
#         pred = model3(image)
#         pred = pred.view((pred.shape[0], 4, 28))
#         pred = torch.argmax(pred, axis=2)
#         for i in range(500 - cnt, len(filenames)):
#             print(i, filenames[i])
#             # csv_writer.writerow([filenames[i], 0])
#             csv_writer.writerow([filenames[i], decodingP(pred[i].cpu().numpy())])
# cnt = 0
# with open(f'/content/submission.csv', newline='') as csvfile:
#     for row in csv.reader(csvfile, delimiter=','):
#         cnt+=1
#     print(cnt)



6500
6500
2500
9000
1000
10000
9922
10001
