In [1]:
# -*- coding: utf-8 -*-
import os, sys, glob, argparse, json
import pandas as pd
import numpy as np
from tqdm import tqdm, tqdm_notebook

import time, datetime
import pdb, traceback

import cv2
# import imagehash
from PIL import Image

from sklearn.model_selection import train_test_split, StratifiedKFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

In [2]:
import logging
logging.basicConfig(level=logging.DEBUG, filename='example.log',
                    format='%(asctime)s - %(filename)s[line:%(lineno)d]: %(message)s')  # 

class QRDataset(Dataset):
    def __init__(self, img_json, transform=None):
        self.img_json = img_json
        
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        start_time = time.time()
        
        img = Image.open(os.path.join('../data/data/', self.img_json[index]['name']))
        
        if self.transform is not None:
            img = self.transform(img)
        
        img_label_idx = self.img_json[index]['text'].strip()
        label0 = np.array(self.char2idx(img_label_idx[0]))
        label1 = np.array(self.char2idx(img_label_idx[1]))
        label2 = np.array(self.char2idx(img_label_idx[2]))
        label3 = np.array(self.char2idx(img_label_idx[3]))
        label4 = np.array(self.char2idx(img_label_idx[4]))
        label5 = np.array(self.char2idx(img_label_idx[5]))
        label6 = np.array(self.char2idx(img_label_idx[6]))
        label7 = np.array(self.char2idx(img_label_idx[7]))
        label8 = np.array(self.char2idx(img_label_idx[8]))
        label9 = np.array(self.char2idx(img_label_idx[9]))
        
        return img, torch.from_numpy(label0), torch.from_numpy(label1), \
                torch.from_numpy(label2), torch.from_numpy(label3), torch.from_numpy(label4),\
                torch.from_numpy(label5), torch.from_numpy(label6), torch.from_numpy(label7),\
                torch.from_numpy(label8), torch.from_numpy(label9)
    
    def __len__(self):
        return len(self.img_json)
    
    def char2idx(self, ch):
        return '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'.find(ch)

In [3]:
class RMB_Net(nn.Module):
    def __init__(self):
        super(RMB_Net, self).__init__()
        
        feat_size = 2048
        self.fc0 = nn.Linear(feat_size, 36)
        self.fc1 = nn.Linear(feat_size, 36)
        self.fc2 = nn.Linear(feat_size, 36)
        self.fc3 = nn.Linear(feat_size, 36)
        self.fc4 = nn.Linear(feat_size, 36)
        self.fc5 = nn.Linear(feat_size, 36)
        self.fc6 = nn.Linear(feat_size, 36)
        self.fc7 = nn.Linear(feat_size, 36)
        self.fc8 = nn.Linear(feat_size, 36)
        self.fc9 = nn.Linear(feat_size, 36)
        
        model = models.resnet50(True)
        model = torch.nn.Sequential(*(list(model.children())[:-1]))
        self.resnet = model
        
    def forward(self, img):
        feat = self.resnet(img)
        feat = feat.reshape(feat.size(0), -1)
        
        out0 = self.fc0(feat)
        out1 = self.fc1(feat)
        out2 = self.fc2(feat)
        out3 = self.fc3(feat)
        out4 = self.fc4(feat)
        out5 = self.fc5(feat)
        out6 = self.fc6(feat)
        out7 = self.fc7(feat)
        out8 = self.fc8(feat)
        out9 = self.fc9(feat)
        
        return F.log_softmax(out0, dim=1), F.log_softmax(out1, dim=1), F.log_softmax(out2, dim=1), \
                F.log_softmax(out3, dim=1), F.log_softmax(out4, dim=1), F.log_softmax(out5, dim=1), \
                F.log_softmax(out6, dim=1), F.log_softmax(out7, dim=1), F.log_softmax(out8, dim=1), \
                 F.log_softmax(out9, dim=1)

In [4]:
'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'.find('3')

3

In [5]:
model = models.resnet18(True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))

In [4]:
with open('../data/desc.json') as up:
    data_json = json.load(up)

In [5]:
def accuracy(outputs, targets):
    with torch.no_grad():
        batch_size = outputs[0].size(0)
        
        output_idx = []
        for output in outputs:
            _, pred = output.topk(1, 1, True, True)
            # pred = pred
            pred = pred.t().flatten()
            output_idx.append(pred.data.cpu().numpy())
        
        output_idx = np.vstack(output_idx)
        targets = [x.data.cpu().numpy() for x in targets]
        targets = np.vstack(targets)
        return ((targets == output_idx).mean(0) == 1).mean()
    
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    
    iterator = tqdm(train_loader)
    for input,target0,target1,target2,target3,target4,target5,target6,target7,target8,target9 in tqdm_notebook(train_loader):
        optimizer.zero_grad()
        
        input = input.cuda(non_blocking=True)
        target0 = target0.cuda(non_blocking=True)
        target1 = target1.cuda(non_blocking=True)
        target2 = target2.cuda(non_blocking=True)
        target3 = target3.cuda(non_blocking=True)
        target4 = target4.cuda(non_blocking=True)
        target5 = target5.cuda(non_blocking=True)
        target6 = target6.cuda(non_blocking=True)
        target7 = target7.cuda(non_blocking=True)
        target8 = target8.cuda(non_blocking=True)
        target9 = target9.cuda(non_blocking=True)

        # compute output
        output0,output1,output2,output3,output4,output5,output6,output7,output8,output9 = model(input)
        loss0 = criterion(output0, target0)
        loss1 = criterion(output1, target1)
        loss2 = criterion(output2, target2)
        loss3 = criterion(output3, target3)
        loss4 = criterion(output4, target4)
        loss5 = criterion(output5, target5)
        loss6 = criterion(output6, target6)
        loss7 = criterion(output7, target7)
        loss8 = criterion(output8, target8)
        loss9 = criterion(output9, target9)
            
        loss = (loss0+loss1+loss2+loss3+loss4+loss5+loss6+loss7+loss8+loss9)/10.0
        
        # measure accuracy and record loss
        acc = accuracy([output0,output1,output2,output3,output4,output5,output6,output7,output8,output9], 
                        [target0,target1,target2,target3,target4,target5,target6,target7,target8,target9])
        
        # print(acc)
        # status = "loss_mean: {}; ACC: {}".format(np.mean([acc0,acc1,acc2,acc3,acc4,acc5,acc6,acc7,acc8,acc9]), 
        #                                          loss.item())
        # iterator.set_description(status)
            
        
        loss.backward()
        optimizer.step()

def validate(val_loader, model, criterion):
    model.eval()
    
    val_acc = []
    val_loss = []
    
    with torch.no_grad():
        for i, (input,target0,target1,target2,target3,target4,target5,target6,target7,target8,target9) in enumerate(val_loader):
            input = input.cuda(non_blocking=True)
            target0 = target0.cuda(non_blocking=True)
            target1 = target1.cuda(non_blocking=True)
            target2 = target2.cuda(non_blocking=True)
            target3 = target3.cuda(non_blocking=True)
            target4 = target4.cuda(non_blocking=True)
            target5 = target5.cuda(non_blocking=True)
            target6 = target6.cuda(non_blocking=True)
            target7 = target7.cuda(non_blocking=True)
            target8 = target8.cuda(non_blocking=True)
            target9 = target9.cuda(non_blocking=True)

            # compute output
            output0,output1,output2,output3,output4,output5,output6,output7,output8,output9 = model(input)
            loss0 = criterion(output0, target0)
            loss1 = criterion(output1, target1)
            loss2 = criterion(output2, target2)
            loss3 = criterion(output3, target3)
            loss4 = criterion(output4, target4)
            loss5 = criterion(output5, target5)
            loss6 = criterion(output6, target6)
            loss7 = criterion(output7, target7)
            loss8 = criterion(output8, target8)
            loss9 = criterion(output9, target9)
            
            loss = loss0+loss1+loss2+loss3+loss4+loss5+loss6+loss7+loss8+loss9
            # measure accuracy and record loss
            acc = accuracy([output0,output1,output2,output3,output4,output5,output6,output7,output8,output9], 
                            [target0,target1,target2,target3,target4,target5,target6,target7,target8,target9])
            
            val_acc.append(acc)
            val_loss.append(loss.item())
        print('VAL', np.mean(val_acc), np.mean(val_loss))

In [6]:
    train_loader = torch.utils.data.DataLoader(
        QRDataset(data_json['fold11_train'],
                transforms.Compose([
                            transforms.RandomAffine(5),
                            transforms.ColorJitter(hue=.05, saturation=.05),
                            transforms.Resize((80, 320)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ), batch_size=100, shuffle=True, num_workers=10, pin_memory=True
    )

    val_loader = torch.utils.data.DataLoader(
        QRDataset(data_json['fold11_test'],
                transforms.Compose([
                            transforms.RandomAffine(5),
                            transforms.ColorJitter(hue=.05, saturation=.05),
                            transforms.Resize((80, 320)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ), batch_size=70, shuffle=True, num_workers=10, pin_memory=True
    )
    
    model = RMB_Net()
    model = model.cuda()
    # model = nn.DataParallel(model).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), 0.001)
    
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.85)
    for epoch_idx in range(20):
        train(train_loader, model, criterion, optimizer, epoch_idx)
        validate(val_loader, model, criterion)
        scheduler.step()

  0%|          | 0/370 [00:00<?, ?it/s]

HBox(children=(IntProgress(value=0, max=370), HTML(value='')))


  0%|          | 0/370 [00:00<?, ?it/s][A

VAL 0.8873212442871885 0.7798011993107042


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))



  0%|          | 0/370 [00:00<?, ?it/s][A[A

VAL 0.9440365619932182 0.3888101142488028


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))




  0%|          | 0/370 [00:00<?, ?it/s][A[A[A

VAL 0.968000884564352 0.2243058097205664


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))





  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A

VAL 0.9780554326993955 0.18222678168431708


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))






  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A

VAL 0.9845864661654137 0.13463027490989157


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))







  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A

VAL 0.9851982898422526 0.1544800761498903


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))








  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A

VAL 0.9883458646616541 0.12649611391029075


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))









  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A

VAL 0.9842105263157894 0.13341617589130214


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))










  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

VAL 0.9909774436090224 0.11875742517019573


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))











  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A

VAL 0.9894736842105264 0.11885783802963008


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))












  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A

VAL 0.9919652071354857 0.1097700136076463


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))













  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A

VAL 0.988205808639245 0.11618428096469295


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))














  0%|          | 0/370 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A

VAL 0.9908373875866134 0.10655048229780636


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))

KeyboardInterrupt: 

In [5]:
data_json.keys()

dict_keys(['abc', 'train', 'test', 'pb', 'fold0_train', 'fold0_test', 'fold1_train', 'fold1_test', 'fold2_train', 'fold2_test', 'fold3_train', 'fold3_test', 'fold4_train', 'fold4_test', 'fold5_train', 'fold5_test', 'fold6_train', 'fold6_test', 'fold7_train', 'fold7_test', 'fold8_train', 'fold8_test', 'fold9_train', 'fold9_test', 'fold10_train', 'fold10_test', 'fold11_train', 'fold11_test', 'fold12_train', 'fold12_test', 'fold13_train', 'fold13_test', 'fold14_train', 'fold14_test'])

In [16]:
model = RMB_Net()

In [19]:
data = QRDataset(data_json['fold11_train'],
                transforms.Compose([
                            # transforms.RandomAffine(10),
                            transforms.ColorJitter(hue=.05, saturation=.05),
                            transforms.Resize((80, 320)),
                            # transforms.ToTensor(),
                            # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]))

In [70]:
def accuracy(outputs, targets):
    with torch.no_grad():
        batch_size = outputs[0].size(0)
        
        output_idx = []
        for output in outputs:
            _, pred = output.topk(1, 1, True, True)
            # pred = pred
            pred = pred.t().flatten()
            output_idx.append(pred.data.cpu().numpy())
        
        output_idx = np.vstack(output_idx)
        targets = [x.data.cpu().numpy() for x in targets]
        targets = np.vstack(targets)
        return (a == b).mean(0)

In [73]:
a, b = accuracy([torch.rand(20, 36), torch.rand(20, 36)], [torch.rand(20), torch.rand(20)])

In [79]:
(a == b).sum(0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [78]:
a.shape

(2, 20)