In [57]:
! pip freeze > requirements.txt 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import copy
from PIL import Image

import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

Initialize device, transforms and class variable.

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
class_names = ['Airplane', 'Ambulance', 'Bicycle', 'Bus', 'Car', 'Fire hydrant', 'Helicopter',
           'Motorcycle', 'Parking meter', 'Stop sign', 'Taxi', 'Traffic sign', 'Truck']
class_names_wt = ['Airplane', 'Ambulance', 'Bicycle', 'Bus', 'Car', 'Fire hydrant', 'Helicopter',
           'Motorcycle', 'Parking meter', 'Stop sign', 'Traffic sign', 'Truck']

Load model with trained weight

In [3]:
#model = torchvision.models.resnet(pretrained=True)
model = torchvision.models.resnext101_64x4d(weights='ResNeXt101_64X4D_Weights.DEFAULT')
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))
model = model.to(DEVICE)
model.load_state_dict(torch.load("../weights/for report/resnext101_OI_100ep.pth", map_location=DEVICE))

<All keys matched successfully>

Load custom dataset and initialize data loader.

In [4]:
# custom dataset that inherits pytorch 'Dataset' class
class CaptchaDatatset(Dataset):
    def __init__(self, transforms, class_names, img_path, size=4, captcha_size=928 ,label_path=None):
        """
        transform: 모든 데이터에 적용할 트랜스폼
        class_names: 클래스들의 이름 리스트
        img_path: 데이터셋 경로
        size: 그리드 한 줄에 있는 이미지 개수
        captcha_size: 전체 캡챠 이미지의 크기
        label_path: 캡챠 정답 텍스트 파일의 경로
        """
        # check directories
        if not os.path.isdir(img_path):
            print(f"Image direcotry not found: {img_path}")
            self.img_path = None
        else:
            self.img_path = img_path
        # check directory
        if label_path is not None:
            if not os.path.isdir(label_path):
                print(f"Label direcotry not found: {label_path}")
                self.label_path = None
            else:
                self.label_path = label_path
        else:
            self.label_path = None
            
        # load images folders
        images = []
        files = os.listdir(self.img_path)
        count = 0
        # find all jpg/png image files in every folder
        for file in files:
            path = os.path.join(img_path,file)
            if (os.path.isfile(path)):
                if path.split("/")[-1].split('.')[1] in ['jpg', 'png', 'JPEG', "JPG", "PNG"]:
                    count+=1
                    images.append(path)
                else:
                    continue
        self.files = images
        self.transforms = transforms
        self.class_names = class_names
        self.labels = {x: idx for x,idx in enumerate(class_names)}
        
        self.captcha_size = captcha_size
        self.size = size # means (size x size) chaptcha
        self.patch_size = int((self.captcha_size-4*self.size-16)/self.size)
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        # load next image
        image = Image.open(self.files[idx])
        image.resize((self.captcha_size,self.captcha_size))
        np_img = np.array(image)
        image_info = []
        image_info.append(np.array(image))
        image_info.append(self.files[idx])
        
        # cut image into patches
        patches = []
        for row in range(self.size):
            for col in range(self.size):
                r= 10+4*(row)+row*self.patch_size
                c= 10+4*(col)+col*self.patch_size
                patch = copy.deepcopy(np_img[r:r+self.patch_size, c:c+self.patch_size])
                patch = Image.fromarray(patch, 'RGB')
                patch = self.transforms(patch) # make array to tensor
                patches.append(patch)
                
        if self.label_path is not None:
            # get answer from answer file
            label_info = []
            # parse file name to read answer text file with the same name
            filename = self.files[idx].split('/')[-1].split(".")[0]
            
            with open(f"{self.label_path}/{filename}.txt", "r") as f:
                lines = f.readlines()
                for line in lines:
                    labels = line.rstrip(',\n').split(",")
                    for elem in labels:
                        label_info.append(elem)
            return label_info, patches, image_info
        else:
            return idx, patches, image_info

In [5]:
"""test_set_divide = CaptchaDatatset(transforms=transforms_test, class_names=class_names, 
                           img_path="../datasets/reCAPTCHA/divide/")
"""
# 2x2 : 472
# 3x3 : 700
# 4x4 : 928
test_set_4x4 = CaptchaDatatset(transforms=transforms_test, class_names=class_names,
                                 size=4, 
                                 captcha_size=928,
                                 img_path="../datasets/reCAPTCHA/4x4/merge/",
                                 label_path="../datasets/reCAPTCHA/4x4/merge/ans")

test_set_3x3 = CaptchaDatatset(transforms=transforms_test, class_names=class_names,
                                 size=3, 
                                 captcha_size=700,
                                 img_path="../datasets/reCAPTCHA/3x3/merge/",
                                 label_path="../datasets/reCAPTCHA/3x3/merge/ans")

test_set_2x2 = CaptchaDatatset(transforms=transforms_test, class_names=class_names,
                                 size=2, 
                                 captcha_size=472,
                                 img_path="../datasets/reCAPTCHA/2x2/merge/",
                                 label_path="../datasets/reCAPTCHA/2x2/merge/ans")

# data_loader_divide = DataLoader(test_set_divide, batch_size=1, shuffle=False, num_workers=0)
data_loader_4x4 = DataLoader(test_set_4x4, batch_size=1, shuffle=False, num_workers=0)
data_loader_3x3 = DataLoader(test_set_3x3, batch_size=1, shuffle=False, num_workers=0)
data_loader_2x2 = DataLoader(test_set_2x2, batch_size=1, shuffle=False, num_workers=0)

In [27]:
def draw_line(img, target, pred, top3, size, captcha_size, patch_size, idx, img_path, save_dir, is_correct, show_result=True):
    """
    이미지 셀 주변에 테두리 표시하기 위한 함수
    img: PIL image
    target: CAPTCHA에서 찾은 대상
    pred: 모델에서의 예측한 셀의 리스트
    top3: 모델에서 top3 값으로 타겟을 포함한 셀의 리스트
    size: 그리드 한 줄에 있는 이미지 개수
    captcha_size: 전체 캡챠 이미지의 크기
    patch_size: 셀 하나의 크기
    idx: 몇 번째 이미지인지
    img_path: img의 경로
    save_dir: 테두리 표시 후 이미지를 저장할 경로
    is_correct: 해당 CAPTCHA 해독에 성공하였는지 여부
    show_result: Image.show()를 통해 결과물을 보여줄 지 여부
    """
    green_check = Image.open("../images/check_green.jpg")
    yellow_check = Image.open("../images/check_yellow.jpg")
    green_check = green_check.resize((100,100))
    yellow_check = yellow_check.resize((100,100))
    
    # load pixel map
    px = img.load()
    
    # check if guess was right
    if is_correct is not None:
        for y in range(0,10):
            for x in range(0,captcha_size):
                if not is_correct:
                    px[x, y] = (255,0,0)
                else:
                    px[x, y] = (0,255,0)
        for y in range(captcha_size-10,captcha_size):
            for x in range(0,captcha_size):
                if not is_correct:
                    px[x, y] = (255,0,0)
                else:
                    px[x, y] = (0,255,0)
        for x in range(0,10):
            for y in range(0,captcha_size):
                if not is_correct:
                    px[x, y] = (255,0,0)
                else:
                    px[x, y] = (0,255,0)
        for x in range(captcha_size-10,captcha_size):
            for y in range(0,captcha_size):
                if not is_correct:
                    px[x, y] = (255,0,0)
                else:
                    px[x, y] = (0,255,0)
    
    # draw yellow lines around top-3 cells
    for t in top3:
        row_idx = t//size
        col_idx = t%size
        for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*row_idx):
            for x in range(10+4*(col_idx-1)+patch_size*col_idx,10+4*col_idx+patch_size*(col_idx+1)+4):
                px[x,y] = (255,200,0) # Yellow 
        
        for y in range(10+4*(row_idx)+patch_size*(row_idx+1),10+4*(row_idx+1)+patch_size*(row_idx+1)):
            for x in range(10+4*(col_idx-1)+patch_size*col_idx,10+4*col_idx+patch_size*(col_idx+1)+4):
                px[x,y] = (255,200,0)

        for x in range(10+4*(col_idx-1)+patch_size*(col_idx),10+4*(col_idx)+patch_size*(col_idx)):
            for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*(row_idx+1)+4):
                px[x,y] = (255,200,0)

        for x in range(10+4*(col_idx)+patch_size*(col_idx+1),10+4*(col_idx+1)+patch_size*(col_idx+1)):
            for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*(row_idx+1)+4):
                px[x,y] = (255,200,0)  
        img.paste(yellow_check,(10+4*(col_idx)+patch_size*col_idx,10+4*(row_idx)+patch_size*row_idx))
        
    # draw green lines around predicted cells
    for p in pred:
        row_idx = p//size
        col_idx = p%size
        for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*row_idx):
            for x in range(10+4*(col_idx-1)+patch_size*col_idx,10+4*col_idx+patch_size*(col_idx+1)+4):
                px[x,y] = (0,220,40) # Green 

        for y in range(10+4*(row_idx)+patch_size*(row_idx+1),10+4*(row_idx+1)+patch_size*(row_idx+1)):
            for x in range(10+4*(col_idx-1)+patch_size*col_idx,10+4*col_idx+patch_size*(col_idx+1)+4):
                px[x,y] = (0,220,40)

        for x in range(10+4*(col_idx-1)+patch_size*(col_idx),10+4*(col_idx)+patch_size*(col_idx)):
            for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*(row_idx+1)+4):
                px[x,y] = (0,220,40)

        for x in range(10+4*(col_idx)+patch_size*(col_idx+1),10+4*(col_idx+1)+patch_size*(col_idx+1)):
            for y in range(10+4*(row_idx-1)+patch_size*row_idx,10+4*row_idx+patch_size*(row_idx+1)+4):
                px[x,y] = (0,220,40)    
        img.paste(green_check,(10+4*(col_idx)+patch_size*col_idx,10+4*(row_idx)+patch_size*row_idx))
        
    if show_result:
        img.show()
    
    # save image
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    filename = img_path.split("/")[-1].split(".")[0]
    img.save(save_dir+f"/{idx}_{filename}_target_{target}_.jpg", "JPEG")

In [7]:
def predict(model, patches, device, target, class_names, save_dir):
    """
    모델에 이미지를 넣어 예측값을 얻고 반환
    model: 예측에 사용할 모델
    patches: 분리된 CAPTCHA 셀 리스트
    device: cpu / gpu
    target: CAPTCHA에서 찾아야 할 대상
    class_names: 클래스들의 이름 리스트
    save_dir: draw_line 함수에 인자로 전달할 이미지 저장 경로
    """
    model.eval()
    prediction = []
    target_in_top3 = []
    result = {}
    with torch.no_grad():
        result["target"] = target
        # predict each cell in order.
        for i, patch in enumerate(patches):
            patch = patch.to(DEVICE)
            output = model(patch)
            prob = nn.Softmax(dim=1)(output)
            _, preds = torch.max(prob, 1)
            top3_values, top3_indices = torch.topk(prob, 3)
            
            # add cell index to prediction list if output is equal to target
            if (target == class_names[preds[0]]):
                prediction.append(i)
            # add cell index to top3 list if top-3 output includes target    
            for class_idx in top3_indices[0]:
                if (target == class_names[class_idx]):
                    target_in_top3.append(i)
        result["prediction"] = prediction
        result["top3"] = target_in_top3
        
        del output, prob, preds, patch, top3_values, top3_indices
        return result

In [18]:
def solve(model, dataloader, device, class_names, mode="merge", size=4, captcha_size=928,
        save_dir="../temp/"):
    """
    dataloader를 통해 로드된 모든 CAPTCHA이미지를 푼다.
    model: 예측에 사용할 모델
    dataloader: 로드한 데이터셋 불러올 데이터로더
    device: cpu / gpu
    class_names: 클래스들의 이름 리스트
    mode: 캡챠가 구성된 방식 - merge: 이미지들이 붙여져서 만들어진 방식 / divide: 이미지를 나눠서 만들어진 방식
    size: 그리드 한 줄에 있는 이미지 개수
    captcha_size: 전체 캡챠 이미지의 크기
    save_dir: draw_line 함수에 인자로 전달할 이미지 저장 경로
    """
    # check directory
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
        
    model.eval()
    corrects = 0
    count = 0
    # compute patch size
    patch_size = int((captcha_size-4*size-16)/size)
    
    # get next data
    for idx, (label_info, patches, image_info) in enumerate(dataloader):
        count += 1
        # randomly choose a target
        target = np.random.choice(class_names, 1)[0]
        # compute prediction on loaded data
        result = predict(model, patches, device, target, class_names, save_dir)
        
        pred = result["prediction"]
        top3 = result["top3"]
        
        # make tensor to numpy array
        image = np.squeeze(image_info[0]).numpy()
        # make array into PIL image
        image = Image.fromarray(image)
        img_path = image_info[1][0]
        
        # merge mode captchas have answer text files
        if mode == "merge":
            # compare prediction with answer
            ans = []
            
            for i in range(len(label_info)):
                if (label_info[i][0] == target):
                    ans.append(i)
            if (len(ans) == len(result["prediction"])):
                if all(item in ans for item in pred):
                    corrects += 1
                    # draw border lines
                    draw_line(image, target, pred, top3, size, captcha_size, patch_size, idx+1, 
                              img_path, save_dir, is_correct=True, show_result=False)
                else:
                    draw_line(image, target, pred, top3, size, captcha_size, patch_size, idx+1, 
                              img_path, save_dir, is_correct=False, show_result=False)
            else:
                draw_line(image, target, pred, top3, size, captcha_size, patch_size, idx+1, 
                              img_path, save_dir, is_correct=False, show_result=False)
            print(f"{idx+1}) target: {target}, prediction: {pred}, in-top3: {top3}, answer: {ans}")
        else:
            print(f"{idx+1}) target: {target}, prediction: {pred}, in-top3: {top3}")
            
        
        
    acc = corrects / count
    if mode == "merge":
        print(f"Test accuracy: {acc}")

In [15]:
def solve_single_captcha(model, transforms, img_path, device, class_names, size=4, captcha_size=928, 
                        save_dir="../temp/"):
    """
    경로로 지정한 CAPTCHA이미지 하나를 해독한다.
    model: 예측에 사용할 모델
    transforms: 이미지 데이터에 적용할 트랜스폼
    img_path: 해독할 이미지의 경로
    device: cpu / gpu
    class_names: 클래스들의 이름 리스트
    size: 그리드 한 줄에 있는 이미지 개수
    captcha_size: 전체 캡챠 이미지의 크기
    save_dir: draw_line 함수에 인자로 전달할 이미지 저장 경로
    """
    model.eval()
    # randomly select a target
    target = np.random.choice(class_names, 1)[0]
    # load image
    image = Image.open(img_path)
    image = image.resize((captcha_size,captcha_size))
    np_img = np.array(image)
    patch_size = int((captcha_size-4*size-16)/size)
    # cut image into patches
    patches = []
    for row in range(size):
        for col in range(size):
            r= 10+4*(row)+row*patch_size
            c= 10+4*(col)+col*patch_size
            patch = copy.deepcopy(np_img[r:r+patch_size, c:c+patch_size])
            patch = Image.fromarray(patch, 'RGB')
            #patch.show()
            patch = transforms(patch) # make into tensor
            patch = patch.unsqueeze(0)
            patches.append(patch)
    
    # get prediction
    result = predict(model, patches, device, target, class_names, save_dir)
    pred = result["prediction"]
    top3 = result["top3"]
    
    # draw border lines
    draw_line(image, target, pred, top3, size, captcha_size, patch_size, 0, img_path, save_dir, is_correct=None, show_result=True)
    
    print(f"target: {target}, prediction: {pred}, in-top3: {top3}")
    
    del patch, patches

Testing Captcha solver

In [31]:
# solve a captcha image
solve_single_captcha(model=model, 
                  transforms=transforms_test, 
                  device=DEVICE, 
                  class_names=class_names,
                  size=3,
                  captcha_size=700,
                  img_path="../datasets/reCAPTCHA/3x3/merge/reCAPTCHA_merge_1.jpg",
                  save_dir="../temp/test/")

target: Traffic sign, prediction: [4], in-top3: [4]


In [34]:
solve(model=model, dataloader=data_loader_4x4, device=DEVICE, class_names=class_names, mode="merge",
    size=4, captcha_size=928, save_dir="../temp/test/4x4")

1) target: Helicopter, prediction: [7, 13], in-top3: [0, 7, 9, 13, 14], answer: [13]
2) target: Bus, prediction: [4], in-top3: [1, 4], answer: [4]
3) target: Truck, prediction: [12], in-top3: [1, 3, 6, 11, 12], answer: [12]
4) target: Car, prediction: [1, 9], in-top3: [1, 3, 7, 9, 10, 11, 13], answer: [1]
5) target: Helicopter, prediction: [], in-top3: [], answer: []
6) target: Fire hydrant, prediction: [7], in-top3: [7], answer: [7]
7) target: Parking meter, prediction: [], in-top3: [], answer: []
8) target: Car, prediction: [4, 13], in-top3: [4, 7, 8, 13, 14], answer: [4]
9) target: Stop sign, prediction: [2, 7], in-top3: [2, 7, 9], answer: [2, 7]
10) target: Airplane, prediction: [9, 13], in-top3: [7, 9, 11, 13], answer: [9, 13]
11) target: Taxi, prediction: [], in-top3: [4, 5, 6, 7], answer: []
12) target: Helicopter, prediction: [6, 10], in-top3: [4, 5, 6, 10, 11, 15], answer: [6, 10]
13) target: Motorcycle, prediction: [0, 3], in-top3: [0, 2, 3, 5, 9, 10, 11, 15], answer: [0]
14)

106) target: Taxi, prediction: [7, 9], in-top3: [7, 9], answer: [7, 9]
107) target: Bus, prediction: [5, 8, 11], in-top3: [0, 5, 8, 9, 11], answer: [5, 8, 11]
108) target: Bicycle, prediction: [], in-top3: [13], answer: []
109) target: Ambulance, prediction: [], in-top3: [1, 4, 8, 10, 14], answer: []
110) target: Fire hydrant, prediction: [6, 9, 15], in-top3: [6, 9, 15], answer: [6, 9, 15]
111) target: Bicycle, prediction: [3], in-top3: [0, 3], answer: [3]
112) target: Truck, prediction: [6], in-top3: [6, 11], answer: [6]
113) target: Motorcycle, prediction: [12], in-top3: [5, 7, 12, 15], answer: [12]
114) target: Motorcycle, prediction: [14], in-top3: [0, 1, 2, 4, 9, 14], answer: [14]
115) target: Traffic sign, prediction: [], in-top3: [11], answer: []
116) target: Stop sign, prediction: [], in-top3: [9, 11], answer: []
117) target: Helicopter, prediction: [15], in-top3: [0, 12, 15], answer: [15]
118) target: Helicopter, prediction: [6], in-top3: [3, 5, 6, 13, 15], answer: [6]
119) ta

In [33]:
solve(model=model, dataloader=data_loader_3x3, device=DEVICE, class_names=class_names, mode="merge",
     size=3, captcha_size=700, save_dir="../temp/test/3x3")

1) target: Helicopter, prediction: [], in-top3: [8], answer: []
2) target: Bicycle, prediction: [3, 7], in-top3: [3, 6, 7], answer: [3, 7]
3) target: Ambulance, prediction: [], in-top3: [4], answer: []
4) target: Truck, prediction: [], in-top3: [], answer: []
5) target: Bicycle, prediction: [0], in-top3: [0, 5], answer: [0]
6) target: Bicycle, prediction: [7], in-top3: [7], answer: [7]
7) target: Car, prediction: [5, 7], in-top3: [1, 3, 5, 7], answer: []
8) target: Airplane, prediction: [6], in-top3: [6, 8], answer: [6, 8]
9) target: Stop sign, prediction: [], in-top3: [], answer: []
10) target: Stop sign, prediction: [], in-top3: [4], answer: [4]
11) target: Helicopter, prediction: [2, 5], in-top3: [2, 5, 8], answer: [2, 5]
12) target: Helicopter, prediction: [1], in-top3: [1, 6], answer: [1]
13) target: Stop sign, prediction: [8], in-top3: [4, 5, 6, 8], answer: [4, 8]
14) target: Taxi, prediction: [], in-top3: [5, 7], answer: []
15) target: Traffic sign, prediction: [5, 8], in-top3: 

121) target: Ambulance, prediction: [], in-top3: [1], answer: []
122) target: Truck, prediction: [6], in-top3: [2, 6], answer: []
123) target: Bus, prediction: [], in-top3: [3, 4, 8], answer: []
124) target: Ambulance, prediction: [], in-top3: [2, 3, 4, 5, 6], answer: []
125) target: Motorcycle, prediction: [3], in-top3: [2, 3, 8], answer: [3]
126) target: Motorcycle, prediction: [], in-top3: [5], answer: []
127) target: Car, prediction: [2], in-top3: [2, 4, 7], answer: []
128) target: Traffic sign, prediction: [7], in-top3: [1, 7], answer: [7]
129) target: Helicopter, prediction: [], in-top3: [1, 2, 3], answer: []
130) target: Helicopter, prediction: [0, 2], in-top3: [0, 2, 5], answer: [0, 2]
131) target: Parking meter, prediction: [3, 5], in-top3: [3, 5], answer: [3, 5, 7]
132) target: Airplane, prediction: [1, 4, 7], in-top3: [1, 4, 7], answer: [1, 4, 7]
133) target: Car, prediction: [8], in-top3: [2, 4, 8], answer: [8]
134) target: Fire hydrant, prediction: [], in-top3: [3], answer

In [32]:
solve(model=model, dataloader=data_loader_2x2, device=DEVICE, class_names=class_names, mode="merge",
     size=2, captcha_size=472, save_dir="../temp/test/2x2")

1) target: Taxi, prediction: [0], in-top3: [0], answer: [0]
2) target: Traffic sign, prediction: [], in-top3: [], answer: []
3) target: Parking meter, prediction: [], in-top3: [], answer: []
4) target: Helicopter, prediction: [], in-top3: [], answer: []
5) target: Traffic sign, prediction: [], in-top3: [], answer: []
6) target: Fire hydrant, prediction: [], in-top3: [], answer: []
7) target: Ambulance, prediction: [1], in-top3: [1], answer: [1]
8) target: Motorcycle, prediction: [], in-top3: [2], answer: []
9) target: Truck, prediction: [2], in-top3: [2], answer: [2]
10) target: Stop sign, prediction: [], in-top3: [2], answer: []
11) target: Bus, prediction: [], in-top3: [], answer: []
12) target: Parking meter, prediction: [1], in-top3: [1], answer: [1]
13) target: Truck, prediction: [], in-top3: [0, 3], answer: []
14) target: Airplane, prediction: [0], in-top3: [0, 2], answer: [0]
15) target: Traffic sign, prediction: [], in-top3: [], answer: []
16) target: Bus, prediction: [1], in-t

129) target: Helicopter, prediction: [0], in-top3: [0], answer: [0]
130) target: Car, prediction: [1], in-top3: [1], answer: [1]
131) target: Fire hydrant, prediction: [2], in-top3: [2], answer: [2]
132) target: Helicopter, prediction: [], in-top3: [1], answer: []
133) target: Car, prediction: [], in-top3: [0], answer: []
134) target: Parking meter, prediction: [], in-top3: [], answer: []
135) target: Airplane, prediction: [], in-top3: [], answer: []
136) target: Fire hydrant, prediction: [0], in-top3: [0], answer: [0]
137) target: Helicopter, prediction: [], in-top3: [0, 2, 3], answer: []
138) target: Fire hydrant, prediction: [2], in-top3: [2], answer: [2]
139) target: Ambulance, prediction: [], in-top3: [3], answer: []
140) target: Bus, prediction: [], in-top3: [1, 2, 3], answer: []
141) target: Car, prediction: [2], in-top3: [0, 1, 2, 3], answer: [2]
142) target: Ambulance, prediction: [3], in-top3: [2, 3], answer: [3]
143) target: Stop sign, prediction: [2], in-top3: [2], answer: 

Following functions are for testing loaded model with single image(s), not captcha image.

Testing single image

In [9]:
def test_single_image(model, transforms, img_dir, device, class_names):
    """
    CAPTCHA 이미지가 아닌 일반 이미지를 모델에 넣어 예측을 확인하기 위한 함수
    model: 예측에 사용할 모델
    transforms: 이미지에 적용할 트랜스폼
    img_dir: 이미지의 경로
    device: cpu / gpu
    class_names: 클래스들의 이름 리스트
    """
    model.eval()
    image = Image.open(img_dir).convert('RGB')
    with torch.no_grad():
        tens_img = transforms(image)
        tens_img = tens_img.to(DEVICE)
        tens_img = tens_img.unsqueeze(0)
        output = model(tens_img)
        prob = nn.Softmax(dim=1)(output)
        m, preds = torch.max(prob, 1)
        if m[0] < 0.5:
            print("None")
        else:
            print(f"{class_names[preds[0]]} : {m[0]}")
        #print(prob)
        del tens_img, output, prob, preds

In [49]:
test_single_image(model=model, 
          transforms=transforms_test, 
          device=DEVICE, 
          class_names=class_names,
          img_dir="C:/Users/Everage/개인/학교/4-3/캡스톤디자인2/project/IMAGE CAPTHCA/datasets/OI_larger_dataset/val/Fire hydrant/3ceffbf79181f616.jpg")

Fire hydrant : 0.9877272844314575


Testing model accuracy 

In [10]:
def test_each_image(model, transforms, testset_dir, device, class_names):
    """
    디렉토리 내 모든 클래스 이미지들에 대해 클래스별로 모델 정확도를 구한다(threshold 적용). 
    model: 예측에 사용할 모델
    transforms: 이미지에 적용할 트랜스폼
    testset_dir: 테스트셋의 경로
    device: cpu / gpu
    class_names: 클래스들의 이름 리스트
    """
    model.eval()
    # read directory
    folders = os.listdir(testset_dir)
    total = 0
    total_correct = 0
    threshold = 0.8
    print(folders)
    # test for each class folders
    for folder in folders:
        files = os.listdir(testset_dir+folder)
        class_count = 0
        class_correct = 0
        # make prediction on each image file
        for file in files:
            class_count+=1
            image = Image.open(testset_dir+folder+"/"+file).convert('RGB')
            image.resize((224,224))
            # apply transform to image
            tens_img = transforms(image)
            tens_img = tens_img.unsqueeze(0)
            # make prediction
            with torch.no_grad():
                tens_img = tens_img.to(DEVICE)
                output = model(tens_img)
                prob = nn.Softmax(dim=1)(output)
                max_prob, preds = torch.max(prob, 1)
                
                ### testing code
                # if certainty is less than threshold, abandon result.
                if max_prob[0] >= threshold:
                    if (folder == class_names[preds[0]]):
                        class_correct += 1
                else:
                    if (folder != class_names[preds[0]]):
                        class_correct += 1
                del output, prob, preds
        total += class_count
        total_correct += class_correct
        print(f"Class {folder} accuracy: {class_correct}/{class_count} -> {class_correct/class_count}")
    print(f"Total accuracy: {total_correct}/{total} -> {total_correct/total}")

In [None]:
testset_dir = "../datasets/train_set_under500/train/"
test_each_image(model=model, 
          transforms=transforms_test, 
          device=DEVICE, 
          class_names=class_names,
          testset_dir=testset_dir)

In [51]:
testset_dir = "../datasets/train_set_under500/val/"
test_each_image(model=model, 
          transforms=transforms_test, 
          device=DEVICE, 
          class_names=class_names,
          testset_dir=testset_dir)

['Airplane', 'Ambulance', 'Bicycle', 'Bus', 'Car', 'Fire hydrant', 'Helicopter', 'Motorcycle', 'Parking meter', 'Stop sign', 'Taxi', 'Traffic sign', 'Truck']
Class Airplane accuracy: 58/99 -> 0.5858585858585859
Class Ambulance accuracy: 42/81 -> 0.5185185185185185
Class Bicycle accuracy: 42/74 -> 0.5675675675675675
Class Bus accuracy: 91/142 -> 0.6408450704225352
Class Car accuracy: 48/115 -> 0.41739130434782606
Class Fire hydrant accuracy: 76/90 -> 0.8444444444444444
Class Helicopter accuracy: 89/133 -> 0.6691729323308271
Class Motorcycle accuracy: 82/127 -> 0.6456692913385826
Class Parking meter accuracy: 11/60 -> 0.18333333333333332
Class Stop sign accuracy: 43/74 -> 0.581081081081081
Class Taxi accuracy: 46/89 -> 0.5168539325842697
Class Traffic sign accuracy: 49/116 -> 0.4224137931034483
Class Truck accuracy: 43/122 -> 0.3524590163934426
Total accuracy: 720/1322 -> 0.5446293494704992


In [None]:
testset_dir = "../datasets/test/"
test_each_image(model=model, 
          transforms=transforms_test, 
          device=DEVICE, 
          class_names=class_names,
          testset_dir=testset_dir)

Attempt to find out optimal threshold.

In [11]:
def get_stats(model, transforms, testset_dir, device, class_names, threshold=0.5):
    """
    Threshold 값을 적용한 상태에서 테스트셋 내 모든 이미지에 대해 예측을 시행하고 다음의 값들을 반환한다:
    TP: number of True Positive values.
    FP: number of False Positive values.
    FN: number of False Negative values.
    FP: number of False Positive values.
    """
    model.eval()
    # read directory
    folders = os.listdir(testset_dir)
    
    total = 0
    TP = 0
    FP = 0
    FN = 0
    TN = 0
    
    # test each folders
    for folder in folders:
        # read directory
        files = os.listdir(testset_dir+folder)
        for file in files:
            # open and apply transform to image
            image = Image.open(testset_dir+folder+"/"+file).convert('RGB')
            image.resize((224,224))
            tens_img = transforms(image)
            tens_img = tens_img.unsqueeze(0)
            # make prediction
            with torch.no_grad():
                tens_img = tens_img.to(DEVICE)
                output = model(tens_img)
                prob = nn.Softmax(dim=1)(output)
                max_p, preds = torch.max(prob, 1)
                
                # thresholding; if certainty is less than threshold, abandon result.
                if max_p.item() >= threshold:
                    if (folder == class_names[preds[0]]):
                        TP += 1
                    else:
                        FP += 1
                else:
                    if (folder == class_names[preds[0]]):
                        FN += 1
                    else:
                        TN += 1
                del output, prob, preds
    
    return TP, FP, FN, TN

In [None]:
testset_dir = "../datasets/test/"
# threshold: [1, 0.95, 0.9, 0.85, .... , 0.5, 0]
thresholds = [0.001*x for x in range(1000, -25, -25)]
TPs = []
FPs = []
FNs = []
TNs = []
FPRs = []
TPRs = []

# test for each threshold value
for i in range(len(thresholds)):
    TP, FP, FN, TN = get_stats(model=model, 
                              transforms=transforms_test, 
                              device=DEVICE, 
                              class_names=class_names,
                              testset_dir=testset_dir,
                              threshold=thresholds[i])
    TPs.append(TP)
    FPs.append(FP)
    FNs.append(FN)
    TNs.append(TN)
    
    # compute False Positive Rate (x-axis value) and True Positive Rate (y-axis value)
    FPR = FP / (TN+FP) # fall-out, false positive rate
    TPR = TP / (TP+FN) # sensitivity, recall
    FPRs.append(FPR)
    TPRs.append(TPR)
    print(f"Threshold: {thresholds[i]} | FPR: {FPR} | TPR: {TPR}")
    
opt = thresholds[np.argmax([tpr-fpr for tpr,fpr in zip(TPRs,FPRs)])]
print(f"Optimal threshold: {opt}")

In [None]:
# plot ROC graph
plt.plot(FPRs, TPRs)
plt.title("ROC")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1])
plt.tight_layout()
plt.show()