In [1]:
import os
from torch.utils.data import DataLoader,Dataset
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import pandas as pd
from torchvision import models
import torch.nn as nn
from pathlib import Path
import torch
from torch.autograd import Variable

In [2]:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
ALL_CHAR_SET = NUMBER + ALPHABET
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
MAX_CAPTCHA = 5

In [3]:
def encode(a):
    onehot = [0]*ALL_CHAR_SET_LEN
    idx = ALL_CHAR_SET.index(a)
    onehot[idx] += 1
    return onehot

In [4]:
class Mydataset(Dataset):
    def __init__(self, path, is_train=True, transform=None):
        self.path = path
        if is_train: self.img = os.listdir(self.path)[:1000]
        else: self.img = os.listdir(self.path)[1001:]
        try: self.img.remove('3bnfnd.png')
        except: pass
        self.transform = transform
        
    def __getitem__(self, idx):
        img_path = self.img[idx]
        img = Image.open(self.path/img_path)
        img = img.convert('L')
        label = Path(self.path/img_path).name[:-4]
        label_oh = []
        for i in label:
            label_oh += encode(i)
        if self.transform is not None:
            img = self.transform(img)
        return img, np.array(label_oh), label
    
    def __len__(self):
        return len(self.img)

In [5]:
transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
])

In [6]:
train_ds = Mydataset(Path('captcha-version-2-images/samples/samples'), transform=transform)
test_ds = Mydataset(Path('captcha-version-2-images/samples/samples'), False, transform)
train_dl = DataLoader(train_ds, batch_size=64, num_workers=0)
test_dl = DataLoader(test_ds, batch_size=1, num_workers=0)

In [7]:
model = models.resnet18(pretrained=False)

In [8]:
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [9]:
model.fc = nn.Linear(in_features=512, out_features=ALL_CHAR_SET_LEN*MAX_CAPTCHA, bias=True)

In [10]:
model.cuda();

In [11]:
loss_func = nn.MultiLabelSoftMarginLoss()
optm = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:
for epoch in range(50):
    for step, i in enumerate(train_dl):
        img, label_oh, label = i
        img = Variable(img).cuda()
        label_oh = Variable(label_oh.float()).cuda()
        pred = model(img)
        loss = loss_func(pred, label_oh)
        optm.zero_grad()
        loss.backward()
        optm.step()
        print('eopch:', epoch+1, 'step:', step+1, 'loss:', loss.item())

eopch: 1 step: 1 loss: 0.699721097946167
eopch: 1 step: 2 loss: 0.4976096749305725
eopch: 1 step: 3 loss: 0.33146166801452637
eopch: 1 step: 4 loss: 0.22709867358207703
eopch: 1 step: 5 loss: 0.1722574532032013
eopch: 1 step: 6 loss: 0.14704519510269165
eopch: 1 step: 7 loss: 0.13508272171020508
eopch: 1 step: 8 loss: 0.1305321902036667
eopch: 1 step: 9 loss: 0.12763851881027222
eopch: 1 step: 10 loss: 0.1288197785615921
eopch: 1 step: 11 loss: 0.13337406516075134
eopch: 1 step: 12 loss: 0.13591766357421875
eopch: 1 step: 13 loss: 0.13892564177513123
eopch: 1 step: 14 loss: 0.13189545273780823
eopch: 1 step: 15 loss: 0.13050614297389984
eopch: 1 step: 16 loss: 0.13151024281978607
eopch: 2 step: 1 loss: 0.12486008554697037
eopch: 2 step: 2 loss: 0.12422467768192291
eopch: 2 step: 3 loss: 0.1222267746925354
eopch: 2 step: 4 loss: 0.11861170828342438
eopch: 2 step: 5 loss: 0.11507734656333923
eopch: 2 step: 6 loss: 0.1122274324297905
eopch: 2 step: 7 loss: 0.11126325279474258
eopch: 2 ste

eopch: 12 step: 13 loss: 0.07609691470861435
eopch: 12 step: 14 loss: 0.07244406640529633
eopch: 12 step: 15 loss: 0.08154202252626419
eopch: 12 step: 16 loss: 0.07330348342657089
eopch: 13 step: 1 loss: 0.07568284124135971
eopch: 13 step: 2 loss: 0.07610709965229034
eopch: 13 step: 3 loss: 0.07459302991628647
eopch: 13 step: 4 loss: 0.07285891473293304
eopch: 13 step: 5 loss: 0.0768188089132309
eopch: 13 step: 6 loss: 0.07337299734354019
eopch: 13 step: 7 loss: 0.07430561631917953
eopch: 13 step: 8 loss: 0.06991912424564362
eopch: 13 step: 9 loss: 0.0691470131278038
eopch: 13 step: 10 loss: 0.07179301232099533
eopch: 13 step: 11 loss: 0.07198887318372726
eopch: 13 step: 12 loss: 0.07961997389793396
eopch: 13 step: 13 loss: 0.07219628989696503
eopch: 13 step: 14 loss: 0.06755445152521133
eopch: 13 step: 15 loss: 0.07676813751459122
eopch: 13 step: 16 loss: 0.06878890842199326
eopch: 14 step: 1 loss: 0.0699026957154274
eopch: 14 step: 2 loss: 0.07080702483654022
eopch: 14 step: 3 loss: 

eopch: 24 step: 5 loss: 0.02523714303970337
eopch: 24 step: 6 loss: 0.028921063989400864
eopch: 24 step: 7 loss: 0.0289878249168396
eopch: 24 step: 8 loss: 0.023684242740273476
eopch: 24 step: 9 loss: 0.021259788423776627
eopch: 24 step: 10 loss: 0.020491696894168854
eopch: 24 step: 11 loss: 0.02666321024298668
eopch: 24 step: 12 loss: 0.0332554429769516
eopch: 24 step: 13 loss: 0.029199030250310898
eopch: 24 step: 14 loss: 0.019710037857294083
eopch: 24 step: 15 loss: 0.023194190114736557
eopch: 24 step: 16 loss: 0.027634531259536743
eopch: 25 step: 1 loss: 0.027202405035495758
eopch: 25 step: 2 loss: 0.023360878229141235
eopch: 25 step: 3 loss: 0.022329919040203094
eopch: 25 step: 4 loss: 0.021639620885252953
eopch: 25 step: 5 loss: 0.021667974069714546
eopch: 25 step: 6 loss: 0.027951551601290703
eopch: 25 step: 7 loss: 0.025422852486371994
eopch: 25 step: 8 loss: 0.019548222422599792
eopch: 25 step: 9 loss: 0.01938430219888687
eopch: 25 step: 10 loss: 0.019170571118593216
eopch: 25

eopch: 35 step: 11 loss: 0.008180780336260796
eopch: 35 step: 12 loss: 0.013734368607401848
eopch: 35 step: 13 loss: 0.012884421274065971
eopch: 35 step: 14 loss: 0.0053854756988584995
eopch: 35 step: 15 loss: 0.006206156220287085
eopch: 35 step: 16 loss: 0.010307284072041512
eopch: 36 step: 1 loss: 0.007795714307576418
eopch: 36 step: 2 loss: 0.009622598066926003
eopch: 36 step: 3 loss: 0.013240208849310875
eopch: 36 step: 4 loss: 0.007219728548079729
eopch: 36 step: 5 loss: 0.005797611083835363
eopch: 36 step: 6 loss: 0.00976509228348732
eopch: 36 step: 7 loss: 0.0073542119935154915
eopch: 36 step: 8 loss: 0.005884835496544838
eopch: 36 step: 9 loss: 0.005733435042202473
eopch: 36 step: 10 loss: 0.005198546685278416
eopch: 36 step: 11 loss: 0.007992325350642204
eopch: 36 step: 12 loss: 0.012474587187170982
eopch: 36 step: 13 loss: 0.01274903304874897
eopch: 36 step: 14 loss: 0.0052006058394908905
eopch: 36 step: 15 loss: 0.006471272557973862
eopch: 36 step: 16 loss: 0.009743391536176

eopch: 46 step: 15 loss: 0.001740145729854703
eopch: 46 step: 16 loss: 0.0026766450610011816
eopch: 47 step: 1 loss: 0.0019442790653556585
eopch: 47 step: 2 loss: 0.0017759962938725948
eopch: 47 step: 3 loss: 0.0016953349113464355
eopch: 47 step: 4 loss: 0.0018213969888165593
eopch: 47 step: 5 loss: 0.0017774637090042233
eopch: 47 step: 6 loss: 0.002698369324207306
eopch: 47 step: 7 loss: 0.00231458549387753
eopch: 47 step: 8 loss: 0.0015946337953209877
eopch: 47 step: 9 loss: 0.001580846612341702
eopch: 47 step: 10 loss: 0.0014905957505106926
eopch: 47 step: 11 loss: 0.00220320513471961
eopch: 47 step: 12 loss: 0.0033494934905320406
eopch: 47 step: 13 loss: 0.004002728499472141
eopch: 47 step: 14 loss: 0.001605217345058918
eopch: 47 step: 15 loss: 0.0016963155940175056
eopch: 47 step: 16 loss: 0.0025351555086672306
eopch: 48 step: 1 loss: 0.0019157490460202098
eopch: 48 step: 2 loss: 0.0016878676833584905
eopch: 48 step: 3 loss: 0.0016096231993287802
eopch: 48 step: 4 loss: 0.00169338

In [13]:
model.eval();

In [None]:
for step, (img, label_oh, label) in enumerate(test_dl):
    img = Variable(img).cuda()
    pred = model(img)

    c0 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[0:ALL_CHAR_SET_LEN])]
    c1 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2])]
    c2 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*2:ALL_CHAR_SET_LEN*3])]
    c3 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*3:ALL_CHAR_SET_LEN*4])]
    c4 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*4:ALL_CHAR_SET_LEN*5])]
    c = '%s%s%s%s%s' % (c0, c1, c2, c3, c4)

    print('label:', label[0], 'pred:', c)

In [14]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # 갱신이 될 때까지 잠시 기다립니다.


In [15]:
test_correct = 0
test_total = len(test_dl.dataset)
num_images = 6
print("test_total=",test_total)

for step, (img, label_oh, label) in enumerate(test_dl):
    view_image = torchvision.utils.make_grid(img)
    img = Variable(img).cuda()
    pred = model(img)

    c0 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[0:ALL_CHAR_SET_LEN])]
    c1 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2])]
    c2 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*2:ALL_CHAR_SET_LEN*3])]
    c3 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*3:ALL_CHAR_SET_LEN*4])]
    c4 = ALL_CHAR_SET[np.argmax(pred.squeeze().cpu().tolist()[ALL_CHAR_SET_LEN*4:ALL_CHAR_SET_LEN*5])]
    preds_label = '%s%s%s%s%s' % (c0, c1, c2, c3, c4)
    
    correct = ""
    if label[0] == preds_label:
        test_correct += 1
        correct = "True"
    else:
        correct = "False"
                
    print("Lable : {} Prediction Lable : {} 맟춤여부 : {}".format(label[0], preds_label, correct))

    # 맞추지 못한 이미지를 출력한다.
    if (correct == "False") and (num_images > step):
        imshow(view_image, title='{} predicted: {}  {}'.format(i+1, preds_label, correct))

#     print('label:', label[0], 'pred:', c)
print(f'Test Accuracy: {(test_correct/test_total):.5f} ' +  f'({test_correct}/{test_total})')

test_total= 69
Lable : xfgxb Prediction Lable : xfdxb 맟춤여부 : False


TypeError: can only concatenate list (not "int") to list