# Initial playground to test out different ways to train the CNN

Nothing here is final.

In [1]:
import PIL
import json
import pandas as pd
import os
import ast
import numpy as np
import cv2
from tqdm import tqdm
import random
import time

from sklearn import metrics

from PIL import Image, ImageDraw, ImageFont

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.nn as nn

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [2]:
params = {
    'train_batch':16,
    'eval_batch':16,
    'lr':0.005,
    'model_name':'faster_rcnn_backbone'
}
model_name = params['model_name']

In [3]:
def seed_everything(seed_value=4995):
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything()

In [4]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
DEVICE

device(type='cuda')

In [6]:
class HandwrittenDataset(Dataset):
    
    def __init__(self, df, visible_char_mapping, transform = None, image_resize = (1000,500), bad_classes = None):
        
        self.data = list(df.itertuples(index=False))
        self.transform = transform
        self.to_tensor = torchvision.transforms.ToTensor()
        
        self.visible_char_mapping = visible_char_mapping
        
        self.labels=np.zeros((len(self.data),len(visible_char_mapping)))
        
        for tup_idx, tup in enumerate(self.data):
            visible_latex_chars = tup.visible_latex_chars
            labels = [*map(self.visible_char_mapping.get, visible_latex_chars)]
            
            for char in labels:
                self.labels[tup_idx, char - 1] = 1
        
        if bad_classes:
            
            new_data = []
            for tup_idx, tup in enumerate(self.data):
                visible_latex_chars = tup.visible_latex_chars
                for label in visible_latex_chars:
                    if label in bad_classes:
                        new_data.append(tup)
                        break
            
            self.data = new_data
            self.labels=np.zeros((len(self.data),len(visible_char_mapping)))
            for tup_idx, tup in enumerate(self.data):
                visible_latex_chars = tup.visible_latex_chars
                labels = [*map(self.visible_char_mapping.get, visible_latex_chars)]

                for char in labels:
                    self.labels[tup_idx, char - 1] = 1
            
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        
        sample = self.data[index]
        label = self.labels[index,:].astype(np.float32)
        labels = torch.tensor(label)

        
        f_name = sample.filename
        image = PIL.Image.open(f_name).convert("RGB")
        
        
        if self.transform:
            image = self.transform(image)

        return image, labels, f_name
    
def collate_fn(batch):
    """
    To handle the data loading as different images may have different number 
    of objects and to handle varying size tensors as well.
    """
    images = [img for img,_,_ in batch]
    labels = torch.stack([lab for _, lab,_ in batch])
    f_names = [f_n for _,_, f_n in batch]
    
    return images, labels, f_names

In [7]:
def create_data_frame(raw_data, image_path):
    data = {}
    data['latex'] = []
    data['seq_len'] = []
    data['latex_string'] = []
    data['visible_latex_chars'] = []
    data['filename'] = []
    data['width'] = []
    data['height'] = []
    data['xmins_raw'] = []
    data['xmaxs_raw'] = []
    data['ymins_raw'] = []
    data['ymaxs_raw'] = []
    data['xmins'] = []
    data['xmaxs'] = []
    data['ymins'] = []
    data['ymaxs'] = []
    
    for image in raw_data:
        data['latex_string'].append(image['latex'])
        data['latex'].append(image['image_data']['full_latex_chars'])
        data['seq_len'].append(len(image['image_data']['full_latex_chars']))
        data['visible_latex_chars'].append(image['image_data']['visible_latex_chars'])
        data['filename'].append(os.path.join(image_path, image['filename']))
        data['xmins_raw'].append(image['image_data']['xmins_raw'])
        data['xmaxs_raw'].append(image['image_data']['xmaxs_raw'])
        data['ymins_raw'].append(image['image_data']['ymins_raw'])
        data['ymaxs_raw'].append(image['image_data']['ymaxs_raw'])
        data['xmins'].append(image['image_data']['xmins'])
        data['xmaxs'].append(image['image_data']['xmaxs'])
        data['ymins'].append(image['image_data']['ymins'])
        data['ymaxs'].append(image['image_data']['ymaxs'])
        
        data['width'].append(image['image_data']['width'])
        data['height'].append(image['image_data']['height'])


    df = pd.DataFrame.from_dict(data)
    return df

In [8]:
def load_data(path = 'data/all_data.csv'):
    if not os.path.isfile(path):
        df = pd.DataFrame()
        for i in range(1,11):
            print(f'data/batch_{i}/JSON/kaggle_data_{i}.json')
            with open(file=f'data/batch_{i}/JSON/kaggle_data_{i}.json') as f:
                raw_data = json.load(f)
            sub_df = create_data_frame(raw_data, f'data/batch_{i}/background_images')
            df = df.append(sub_df)
        df.to_csv(path)
        df = pd.read_csv(path).drop(columns = 'Unnamed: 0')
    else:
        df = pd.read_csv(path).drop(columns = 'Unnamed: 0')

    list_cols = ['xmins_raw', 'xmaxs_raw', 'ymins_raw', 'ymaxs_raw', 'xmins', 'xmaxs', 'ymins', 'ymaxs']
    for c in list_cols:
        df[c] = df[c].apply(json.loads)

    df['latex'] = df['latex'].replace("'\\\\", "'\\")
    df['latex'] = df['latex'].apply(ast.literal_eval)
    
    #vocab = df['latex'].explode().unique().tolist()[0]
    df['visible_latex_chars'] = df['visible_latex_chars'].replace("'\\\\", "'\\")
    df['visible_latex_chars'] = df['visible_latex_chars'].apply(ast.literal_eval)
    
    with open(file=f'data/extras/visible_char_map.json') as f:
        visible_char_map = json.load(f)
    
    return df, visible_char_map

In [9]:
def split_dataframe(df):
    X_train, X_test = train_test_split(df, test_size=0.20, random_state=4995)
    
    return X_train, X_test

def prepare_data(batch_size = 32):
    
    df, visible_char_map = load_data()
    
    # num_classes = len(visible_char_map)
    
    l = []
    for i in df['visible_latex_chars'].tolist():
        for j in i:
            l.append(j)
    
    classes = sorted(list(set(l)))
    num_classes = len(set(l))
    
    visible_char_map = {}
    for idx, symbol in enumerate(classes):
        visible_char_map[symbol] = idx + 1 
    
    return df, visible_char_map, num_classes, classes

def build_dataloaders(df, visible_char_map, df2 = None,  batch_size = 32, bad_classes = None):
    data_transforms = {
      'train': transforms.Compose([
        #  transforms.Resize((896,896)),
        #  transforms.RandomHorizontalFlip(),
          transforms.ToTensor(),
        #  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
      'val': transforms.Compose([
       #   transforms.Resize((896,896)),
          #transforms.CenterCrop(256),
          #transforms.RandomHorizontalFlip(),
          transforms.ToTensor(),
       #   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
    }
    
    if df2 is None:
        train_df, val_df = split_dataframe(df)
    else:
        train_df, val_df = df, df2
    
    train_dataset = HandwrittenDataset(train_df, visible_char_map, transform = data_transforms['train'], bad_classes = bad_classes)
    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers=1, collate_fn = collate_fn)
    
    val_dataset = HandwrittenDataset(val_df, visible_char_map, transform = data_transforms['val'], bad_classes = bad_classes)
    val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False, num_workers=1, collate_fn = collate_fn)
    
    return train_loader, val_loader

In [10]:
def build_resnet(num_classes, model_path = 'models/resnet.pt', to_cuda = True):
    if not model_path:
        model = torchvision.models.resnet18(pretrained = True)
        input_feat = model.fc.in_features
        
        model.fc = nn.Linear(input_feat, num_classes)
        loaded_state_dict = False

    else:
        print("Loaded", model_path)
        model = torchvision.models.resnet18()
        input_feat = model.fc.in_features
        model.fc = nn.Linear(input_feat, num_classes)
        loaded_model = torch.load(model_path)
        model.load_state_dict(loaded_model)
        loaded_state_dict = True
        
    if to_cuda:
        model = model.to(DEVICE)
        
    return model, loaded_state_dict

In [11]:
df, visible_char_map, num_classes, classes = prepare_data()

In [12]:
from sklearn.utils import shuffle
df_shuf = shuffle(df, random_state = 1)

In [13]:
red_df = df_shuf[:40000]
val_df = df_shuf[40000:50000]

In [None]:
#test_df = df_shuf[5001:10000]

In [14]:
train_loader, val_loader = build_dataloaders(red_df, visible_char_map, df2 = val_df, batch_size = params['train_batch'])


In [16]:
_, test_loader = build_dataloaders(test_df, visible_char_map, batch_size = 16)

NameError: name 'test_df' is not defined

In [15]:
resnet, loaded_state_dict = build_resnet(num_classes, None)

In [27]:
for n, p in resnet.named_parameters():
    if 'fc' in n:
        print(n, p.requires_grad)
    else:
        p.requires_grad = False

fc.weight True
fc.bias True


In [15]:
class FasterRCNNBackboneModel(nn.Module):
    
    def __init__(self, faster_rcnn, num_classes = 54):
        super(FasterRCNNBackboneModel, self).__init__()
        
        self.trans = faster_rcnn.transform
        self.backbone = faster_rcnn.backbone
        
        self.adapt = nn.AdaptiveAvgPool2d((1,1))
        
        input_feat = 256
        
        self.classifier = nn.Linear(input_feat, num_classes)
        
    def forward(self, x):
        out = self.trans(x)
        out = self.backbone(out[0].tensors)['0']
        
        out = self.adapt(out).squeeze().squeeze()
        
        out = self.classifier(out)
        
        return out
        
faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True, pretrained_backbone = True)
in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features
faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, 55)
faster_rcnn.load_state_dict(torch.load('models/faster_fine_tuned.pt')['model_state_dict'])
model = FasterRCNNBackboneModel(faster_rcnn, num_classes)

In [16]:
faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True, pretrained_backbone = True)
in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features
faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, 55)
faster_rcnn.load_state_dict(torch.load('models/faster_fine_tuned.pt')['model_state_dict'])

<All keys matched successfully>

In [17]:
model = FasterRCNNBackboneModel(faster_rcnn, num_classes)

modules = list(model.children())[:-2]
modules[-1] = modules[-1].body
modules.append(nn.AdaptiveAvgPool2d((1,1)))
modules.append(nn.Linear(in_features=2048, out_features=54, bias=True))
#modules.pop(0)
model = nn.Sequential(*modules)

In [18]:
model.to(DEVICE)
pass

In [19]:
for n, param in model.named_parameters():
    if 'classifier' not in n:
        param.requires_grad = False

In [20]:
def train_loop(model, train_loader, loss_fn, optimizer, scheduler, num_classes = 54):
    
    model.train()
    
    train_loss_list = []
    print("Train loop")
    
    concat_pred = np.zeros((1, num_classes))
    concat_labels = np.zeros((1, num_classes))
    avgprecs = np.zeros(num_classes)
    
    for i, data in enumerate((train_loader)):
        optimizer.zero_grad()
        images, targets, _ = data
        
        images = list(image.to(DEVICE) for image in images)
        targets = targets.to(DEVICE)
        
        output = model(images)
        
        cpuout= output.detach().to('cpu')
        pred_scores = cpuout.numpy() 
        concat_pred = np.append(concat_pred, pred_scores, axis = 0)
        concat_labels = np.append(concat_labels, targets.cpu().numpy(), axis = 0)
        
        loss = loss_fn(output, targets)
        
        loss_value = loss.item()
        train_loss_list.append(loss_value)
        
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        if i % 10 == 0:
            print(f'Batch: {i} of {len(train_loader)}. Loss:',loss_value, "Mean so far:", np.mean(train_loss_list))
            
    concat_pred = concat_pred[1:,:]
    concat_labels = concat_labels[1:,:]

    for c in range(num_classes):   
        avgprecs[c] =  metrics.average_precision_score(concat_labels[:,c], concat_pred[:,c])
        
    measure = np.mean(avgprecs)

    return np.mean(train_loss_list), measure

In [21]:
def val_loop(model, val_loader, loss_fn, num_classes = 54):
    
    model.eval()
    
    val_loss_list = []

    concat_pred = np.zeros((1, num_classes))
    concat_labels = np.zeros((1, num_classes))
    avgprecs = np.zeros(num_classes)
    
    print("Validation loop")
    
    with torch.no_grad():
        for i, data in enumerate((val_loader)):
            images, targets, _ = data

            images = list(image.to(DEVICE) for image in images)
            targets = targets.to(DEVICE)

            output = model(images)
            
            cpuout= output.detach().to('cpu')
            pred_scores = cpuout.numpy() 
            concat_pred = np.append(concat_pred, pred_scores, axis = 0)
            concat_labels = np.append(concat_labels, targets.cpu().numpy(), axis = 0)
        
            loss = loss_fn(output, targets)
            
            loss_value = loss.item()
            val_loss_list.append(loss_value)
            if i % 10 == 0:
                print(f'Batch: {i} of {len(val_loader)}. Loss:',loss_value, "Mean so far:", np.mean(val_loss_list))
            
    loss_mean = np.mean(val_loss_list)
    print("Eval loss:",loss_mean)

    concat_pred = concat_pred[1:,:]
    concat_labels = concat_labels[1:,:]

    for c in range(num_classes):   
        avgprecs[c]=  metrics.average_precision_score(concat_labels[:,c], concat_pred[:,c])
        
    measure = np.mean(avgprecs)
        
    return loss_mean, measure

In [22]:
def train(model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs = 5,  model_name = 'resnet', save_path = 'models'):
    
    train_losses = []
    train_avg_prec_list = []
    val_losses = []
    val_avg_prec_list = []
    
    best_val_loss = 0
    
    for epoch in range(epochs):
        print(f"Epoch: {epoch}")
        tic = time.time()
        train_loss, train_avg_prec = train_loop(model, train_loader, loss_fn, optimizer, scheduler)
        train_losses.append(train_loss)
        train_avg_prec_list.append(train_avg_prec)
        
        print("Train loss:", train_loss, train_avg_prec)
        print(f"Train loop took {time.time()-tic}")
        tic = time.time()
        val_loss, val_avg_prec = val_loop(model, val_loader, loss_fn)
        print("Validation loss:", val_loss, val_avg_prec)
        print(f"Validation loop took {time.time()-tic}")
        val_losses.append(val_loss)
        val_avg_prec_list.append(val_avg_prec)
        #scheduler.step()
        
        if not best_val_loss:
            best_val_loss = val_loss
            
            model_name_pt = model_name+'.pt'
            PATH = os.path.join(save_path, model_name_pt)
            model.to('cpu')
            #torch.save(model.state_dict(), PATH)
            torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            }, PATH)
            model.to(DEVICE)
            
        else:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                
                model_name_pt = model_name+'.pt'
                PATH = os.path.join(save_path, model_name_pt)
                model.to('cpu')
                torch.save(model.state_dict(), PATH)
                model.to(DEVICE)
    
    return train_losses, val_losses, train_avg_prec, val_avg_prec

In [24]:
loaded_state_dict = False

In [25]:
train_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.BCEWithLogitsLoss(weight=None, reduction='mean')

optimizer = torch.optim.Adam(train_params, lr = params['lr'], weight_decay = 0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = len(train_loader) * 5)
if loaded_state_dict:
    optimizer.load_state_dict(torch.load(f"models/{model_name}.pt")['optimizer_state_dict'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2,4], gamma = 0.1)
    scheduler.load_state_dict(torch.load(f"models/{model_name}.pt")['optimizer_state_dict'])

train_losses, val_losses, train_avg_prec, val_avg_prec = train(model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs = 5, model_name = params['model_name'])

Epoch: 0
Train loop
Batch: 0 of 2500. Loss: 0.8642555475234985 Mean so far: 0.8642555475234985
Batch: 10 of 2500. Loss: 0.548582136631012 Mean so far: 0.509785692800175
Batch: 20 of 2500. Loss: 0.40228840708732605 Mean so far: 0.46864643409138634
Batch: 30 of 2500. Loss: 0.3988168239593506 Mean so far: 0.4439778135668847
Batch: 40 of 2500. Loss: 0.40225085616111755 Mean so far: 0.43224306004803353
Batch: 50 of 2500. Loss: 0.363121896982193 Mean so far: 0.4229491072542527
Batch: 60 of 2500. Loss: 0.3746149241924286 Mean so far: 0.4162500163570779
Batch: 70 of 2500. Loss: 0.3509426712989807 Mean so far: 0.41201846574393797
Batch: 80 of 2500. Loss: 0.37690404057502747 Mean so far: 0.4082700220155127
Batch: 90 of 2500. Loss: 0.3812686800956726 Mean so far: 0.40653348296553227
Batch: 100 of 2500. Loss: 0.3916255831718445 Mean so far: 0.4046802467638903
Batch: 110 of 2500. Loss: 0.4290059506893158 Mean so far: 0.40388093526298935
Batch: 120 of 2500. Loss: 0.4124187231063843 Mean so far: 0.40

KeyboardInterrupt: 

In [19]:
for n, param in model.named_parameters():
    param.requires_grad = True

In [33]:
for sam in train_loader:
    break

In [None]:
train_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.BCEWithLogitsLoss(weight=None, reduction='mean')

optimizer = torch.optim.Adam(train_params, lr = 0.0001, weight_decay = 0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = len(train_loader) * 5)


train_losses, val_losses, train_avg_prec, val_avg_prec = train(model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs = 5, model_name = 'resnet_lots_of_data')

In [20]:
train_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.BCEWithLogitsLoss(weight=None, reduction='mean')

optimizer = torch.optim.Adam(train_params, lr = 0.00001, weight_decay = 0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = len(train_loader) * 5)


train_losses, val_losses, train_avg_prec, val_avg_prec = train(model, train_loader, val_loader, loss_fn, optimizer, scheduler, epochs = 5, model_name = 'resnet_lots_of_data_another_one')

Epoch: 0
Train loop
Batch: 0 of 1250. Loss: 0.23639686405658722 Mean so far: 0.23639686405658722
Batch: 10 of 1250. Loss: 0.23683181405067444 Mean so far: 0.24067470024932514
Batch: 20 of 1250. Loss: 0.23840096592903137 Mean so far: 0.2413082562741779
Batch: 30 of 1250. Loss: 0.2547518014907837 Mean so far: 0.24146087563806964
Batch: 40 of 1250. Loss: 0.23345978558063507 Mean so far: 0.2414681886027499
Batch: 50 of 1250. Loss: 0.2467907816171646 Mean so far: 0.24195251949861937
Batch: 60 of 1250. Loss: 0.24534589052200317 Mean so far: 0.2422728650882596
Batch: 70 of 1250. Loss: 0.23527628183364868 Mean so far: 0.24176179229373662
Batch: 80 of 1250. Loss: 0.23909978568553925 Mean so far: 0.24234734199665212
Batch: 90 of 1250. Loss: 0.24108436703681946 Mean so far: 0.24189489359384056
Batch: 100 of 1250. Loss: 0.23411546647548676 Mean so far: 0.24162543498643554
Batch: 110 of 1250. Loss: 0.24754904210567474 Mean so far: 0.2416993160774042
Batch: 120 of 1250. Loss: 0.23959331214427948 Mea

OSError: [Errno 12] Cannot allocate memory

In [19]:

resnet.load_state_dict(torch.load('models/resnet_re_train_fine_tuned.pt')['model_state_dict'])

<All keys matched successfully>

# Validation loop and checking the performance per class

In [20]:
def val_loop_return_everything(model, val_loader, loss_fn, num_classes = 54):
    
    model.eval()
    
    val_loss_list = []

    concat_pred = np.zeros((1, num_classes))
    concat_labels = np.zeros((1, num_classes))
    avgprecs = np.zeros(num_classes)
    
    print("Validation loop")
    
    with torch.no_grad():
        for i, data in enumerate((val_loader)):
            images, targets, _ = data

            images = images.to(DEVICE)
            targets = targets.to(DEVICE)

            output = model(images)
            
            cpuout= output.detach().to('cpu')
            pred_scores = cpuout.numpy() 
            concat_pred = np.append(concat_pred, pred_scores, axis = 0)
            concat_labels = np.append(concat_labels, targets.cpu().numpy(), axis = 0)
        
            loss = loss_fn(output, targets)
            
            loss_value = loss.item()
            val_loss_list.append(loss_value)
            if i % 10 == 0:
                print(f'Batch: {i} of {len(val_loader)}. Loss:',loss_value)
            
    loss_mean = np.mean(val_loss_list)
    print("Eval loss:",loss_mean)

    concat_pred = concat_pred[1:,:]
    concat_labels = concat_labels[1:,:]

    for c in range(num_classes):   
        avgprecs[c]=  metrics.average_precision_score(concat_labels[:,c], concat_pred[:,c])
        
    measure = np.mean(avgprecs)
        
    return loss_mean, measure, avgprecs, concat_pred, concat_labels

In [22]:

loss_fn = nn.BCEWithLogitsLoss(weight=None, reduction='mean')

ret = val_loop_return_everything(resnet, val_loader, loss_fn)

Validation loop


AttributeError: 'list' object has no attribute 'to'

In [23]:
np.set_printoptions(suppress=True)

In [26]:
for i in ret[-3]:
    print(np.mean(i))

0.9870158440830942
0.9860802837457596
0.0008444061645094373
0.9615192426217706
0.4052841994340145
0.6426328663811259
0.7506891736348382
0.7506141921024738
0.4818765946896204
0.5054226232132538
0.48720178325831887
0.4774082281751355
0.5656422401310055
0.47246565545210506
0.8902608096686703
0.0362953773407202
0.9892332474261584
0.10100706537101628
0.24392540706476712
0.9988255537189779
0.95352623850222
0.9989855430593186
0.7281443152115609
1.0
0.8780345905556952
0.9993825393768674
0.9943368170768624
0.9989856941377634
0.7281443152115609
0.41310317631677457
0.9978124722321937
0.9936409320849163
0.9895038258427693
0.08517321676222847
1.0
0.06268768298934228
0.057532173820173976
0.07075368668074615
0.9987485582052075
0.3547880845835218
0.07094818271289594
0.13005071247256356
0.05575620379992699
0.036658599604282076
0.06177173029169289
0.056742231672242294
0.08818458080452635
0.12918121948209244
0.12218780461106363
0.05597095270485711
0.10611170698554667
0.9938983333284515
0.1705487744080593

In [27]:
inv_map = {v: k for k, v in visible_char_map.items()}

In [32]:
bad_classes = []

In [35]:
bad_classes

['.',
 '0',
 '1',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '\\cdot',
 '\\cot',
 '\\csc',
 '\\sec',
 '\\theta',
 'a',
 'b',
 'c',
 'e',
 'g',
 'h',
 'k',
 'n',
 'p',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'y',
 'z']

In [60]:
for k,v in visible_char_map.items():
    if ret[-3][v-1] < 0.7:
        print(k, ret[-3][v-1])
        bad_classes.append(k)

. 0.0008444061645094373
0 0.4052841994340145
1 0.6426328663811259
4 0.4818765946896204
5 0.5054226232132538
6 0.48720178325831887
7 0.4774082281751355
8 0.5656422401310055
9 0.47246565545210506
\cdot 0.0362953773407202
\cot 0.10100706537101628
\csc 0.24392540706476712
\sec 0.41310317631677457
\theta 0.08517321676222847
a 0.06268768298934228
b 0.057532173820173976
c 0.07075368668074615
e 0.3547880845835218
g 0.07094818271289594
h 0.13005071247256356
k 0.05575620379992699
n 0.036658599604282076
p 0.06177173029169289
r 0.056742231672242294
s 0.08818458080452635
t 0.12918121948209244
u 0.12218780461106363
v 0.05597095270485711
w 0.10611170698554667
y 0.17054877440805938
z 0.09434183395531875


# Check distribution of samples

In [42]:
visible_char_map

{'+': 1,
 '-': 2,
 '.': 3,
 '/': 4,
 '0': 5,
 '1': 6,
 '2': 7,
 '3': 8,
 '4': 9,
 '5': 10,
 '6': 11,
 '7': 12,
 '8': 13,
 '9': 14,
 '=': 15,
 '\\cdot': 16,
 '\\cos': 17,
 '\\cot': 18,
 '\\csc': 19,
 '\\frac': 20,
 '\\infty': 21,
 '\\left(': 22,
 '\\left|': 23,
 '\\lim_': 24,
 '\\ln': 25,
 '\\log': 26,
 '\\pi': 27,
 '\\right)': 28,
 '\\right|': 29,
 '\\sec': 30,
 '\\sin': 31,
 '\\sqrt': 32,
 '\\tan': 33,
 '\\theta': 34,
 '\\to': 35,
 'a': 36,
 'b': 37,
 'c': 38,
 'd': 39,
 'e': 40,
 'g': 41,
 'h': 42,
 'k': 43,
 'n': 44,
 'p': 45,
 'r': 46,
 's': 47,
 't': 48,
 'u': 49,
 'v': 50,
 'w': 51,
 'x': 52,
 'y': 53,
 'z': 54}

In [29]:
tmp = torch.zeros(54)
for idx, i in enumerate(train_loader):
    tmp = tmp + i[1].sum(axis=0)
    if idx % 100 == 0:
        print(idx)

0


KeyboardInterrupt: 

In [25]:
tmp.int()
(tmp.int()-tmp.mean()) /tmp.std()

tensor([ 1.9298,  1.4389, -0.8893, -0.1376, -0.2303,  0.1438,  1.5588,  1.0133,
         0.5252,  0.5106,  0.4954,  0.4978,  0.5105,  0.5309, -0.4883, -0.8576,
         0.1215, -0.7701, -0.6210,  2.6319, -0.0322,  0.4716, -0.7094,  3.2357,
        -0.4863, -0.7141,  0.3107,  0.4716, -0.7094, -0.4457,  0.4176, -0.5564,
         0.2592, -0.7272,  3.2357, -0.7084, -0.7120, -0.7118, -0.4455, -0.6560,
        -0.7155, -0.5469, -0.7136, -0.7504, -0.7094, -0.7257, -0.7300, -0.5891,
        -0.5475, -0.7215, -0.5469, -0.1463, -0.5447, -0.7146])

In [21]:
tmp_2 = torch.zeros(54)
for j in val_loader:
    tmp_2 = tmp_2 + j[1].sum(axis=0)

In [24]:
(tmp_2.int()-tmp_2.mean()) /tmp_2.std()

tensor([ 1.9091,  1.4448, -0.8898, -0.1522, -0.2285,  0.1663,  1.5614,  1.0286,
         0.5391,  0.5095,  0.5288,  0.5185,  0.5025,  0.5301, -0.4851, -0.8564,
         0.1238, -0.7657, -0.6158,  2.6429, -0.0142,  0.4510, -0.6998,  3.2292,
        -0.4983, -0.6940,  0.2944,  0.4510, -0.6998, -0.4716,  0.3995, -0.5560,
         0.2771, -0.7229,  3.2292, -0.7126, -0.7126, -0.7072, -0.4617, -0.6706,
        -0.7175, -0.5292, -0.7159, -0.7649, -0.7336, -0.7398, -0.7212, -0.5824,
        -0.5535, -0.7048, -0.5680, -0.1353, -0.5474, -0.7085])

In [23]:
visible_char_map

{'+': 1,
 '-': 2,
 '.': 3,
 '/': 4,
 '0': 5,
 '1': 6,
 '2': 7,
 '3': 8,
 '4': 9,
 '5': 10,
 '6': 11,
 '7': 12,
 '8': 13,
 '9': 14,
 '=': 15,
 '\\cdot': 16,
 '\\cos': 17,
 '\\cot': 18,
 '\\csc': 19,
 '\\frac': 20,
 '\\infty': 21,
 '\\left(': 22,
 '\\left|': 23,
 '\\lim_': 24,
 '\\ln': 25,
 '\\log': 26,
 '\\pi': 27,
 '\\right)': 28,
 '\\right|': 29,
 '\\sec': 30,
 '\\sin': 31,
 '\\sqrt': 32,
 '\\tan': 33,
 '\\theta': 34,
 '\\to': 35,
 'a': 36,
 'b': 37,
 'c': 38,
 'd': 39,
 'e': 40,
 'g': 41,
 'h': 42,
 'k': 43,
 'n': 44,
 'p': 45,
 'r': 46,
 's': 47,
 't': 48,
 'u': 49,
 'v': 50,
 'w': 51,
 'x': 52,
 'y': 53,
 'z': 54}

In [None]:
def test_loop(model, test_loader):
    
    model = model.eval()
    test_loss_list = []
    
    with torch.no_grad():
        for i, data in enumerate(tqdm(test_loader)):
            images, targets, _ = data

            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            out = model(images, targets)

            losses = sum(l for l in loss.values())

            loss_value = losses.item()
            val_loss_list.append(loss_value)
            
    loss_mean = np.mean(val_loss_list)
    print("Eval loss:",loss_mean)
        
    return loss_mean

In [50]:
a = val_loop(model, test_loader, loss_fn)

Validation loop
Batch: 0 of 63. Loss: 0.25578728318214417
Batch: 10 of 63. Loss: 0.2570032775402069
Batch: 20 of 63. Loss: 0.27196961641311646
Batch: 30 of 63. Loss: 0.2888139486312866
Batch: 40 of 63. Loss: 0.2780291438102722
Batch: 50 of 63. Loss: 0.28471431136131287
Batch: 60 of 63. Loss: 0.2738357186317444
Eval loss: 0.27238586355769445


  recall = tps / tps[-1]


In [59]:
_,_, concat_pred, concat_labels = a

In [57]:
concat_pred

array([[ 0.58596671, -0.0599956 , -4.38828087, ..., -2.49682307,
        -2.52478766, -2.7654357 ],
       [ 2.59002328, -0.49211001, -5.47638988, ..., -2.8251071 ,
        -3.16462708, -3.21002603],
       [ 0.84441227,  0.1477924 , -4.38981915, ..., -2.51568341,
        -2.34248924, -2.6786375 ],
       ...,
       [ 0.41429237,  0.14224967, -5.43825483, ...,  0.1576449 ,
        -3.03038526, -3.68416119],
       [-0.65875429, -0.70756537, -4.95354319, ..., -3.03540778,
        -2.58185124, -3.12093306],
       [ 2.99820757, -0.37740004, -4.89705801, ..., -2.35423446,
        -2.75874138, -2.97725725]])

In [None]:
def add_text_border(draw_obj, font, text, xmin, ymin):
    """
    Add a thin black border around the text, helps with visualization. Modifies the draw object in place.
    
    Parameters
    ----------
    draw_obj : PIL.ImageDraw.ImageDraw
        The draw object.
    font : PIL.ImageFont.FreeTypeFont
        The ImageFont to add a border to.
    text : str
        The precise text being outlined, generally the label.
    xmin, ymin: int
        The xmin and ymin for the starting point of the text. (Top-Left)
    
    Returns
    ----------
    None
    """
    # Add a thin border.
    draw_obj.text((xmin-2, ymin), text, font=font, fill="black")
    draw_obj.text((xmin+2, ymin), text, font=font, fill="black")
    draw_obj.text((xmin, ymin-2), text, font=font, fill="black")
    draw_obj.text((xmin, ymin+2), text, font=font, fill="black")

def draw_bounding_boxes_on_image(img, xmins, ymins, xmaxs, ymaxs, labels):
    """
    Draws and labels bounding boxes on source image using ground truth lists of details pertaining to the source image. Modifies the source image in place.
    
    Parameters
    ----------
    img : PIL.Image.Image
        The source image.
    xmins, ymins, xmaxs, ymaxs : list
        A list of the respectful coordinates for the image
    labels : list
        A list of labels for each character to be drawn.

    Returns
    ----------
    None
    """
    draw_obj = ImageDraw.Draw(img)
    font_file = "/kaggle/input/ocr-data/extras/single_example/Roboto-Regular.ttf"
    font = ImageFont.truetype(font_file, 32)
    for xmin, ymin, xmax, ymax, label in zip(xmins, ymins, xmaxs, ymaxs, labels):
        draw_obj.rectangle([xmin, ymin, xmax, ymax], width=3)
        text = str(label.item())
        add_text_border(draw_obj, font, text, xmin, ymin)
        draw_obj.text((xmin, ymin), text, font=font)
        
def visualize_predictions(model, loader, num_samples = 5, detection_threshold=0.5):
    model.eval()
    for i, samp in enumerate(test_loader):
        if i == num_samples:
            break
        images, targets, img_file = samp
        images = list(img.to(DEVICE) for img in im)
        
        with torch.no_grad():
            outputs = model(images)
        
        boxes = outputs[0]['boxes'].cpu().data.numpy()
        scores = outputs[0]['scores'].cpu().data.numpy()
        
        #image = cv2.imread(img_file[0])
        #orig_image = image.copy()
        #image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
        
        #boxes = boxes[scores >= detection_threshold].astype(np.int32)
        #draw_boxes = boxes.copy()
        
        xmins = boxes[:,1]
        ymins = boxes[:,3]
        xmaxs = boxes[:,0]
        ymaxs = boxes[:,2]
        
        labels = out[0]['labels']
        
        img_fil = img_file[0]
        img_display = Image.open(img_fil)
        #img_display = img_display.resize((896, 896))
        
        """
        for j, box in enumerate(draw_boxes):
            class_name = pred_classes[j]
            color = COLORS[CLASSES.index(class_name)]
            cv2.rectangle(orig_image,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        color, 2)
            cv2.putText(orig_image, class_name, 
                        (int(box[0]), int(box[1]-5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 
                        2, lineType=cv2.LINE_AA)

        cv2.imshow('Prediction', orig_image)
        """
        
        draw_bounding_boxes_on_image(img_display, xmins, ymins, xmaxs, ymaxs, labels)
        
        display(img_display)
        
    

In [None]:
visualize_predictions(model, test_loader)

In [None]:
for i in test_loader:
    break

In [None]:
model = model.eval()

In [None]:
im, targets,_ = i

In [None]:
images = list(img.to(DEVICE) for img in im)

In [None]:
out = model(images)

In [None]:
out

In [None]:
df.width.mean(), df.height.mean()