<a href="https://colab.research.google.com/github/finardi/tutos/blob/master/DocQVA-Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Sun Jan 10 13:24:13 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.27.04    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    23W / 300W |      0MiB / 16130MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%%time

!tar -xf /content/drive/MyDrive/Colab\ Notebooks/Final-project/train.tar.gz 
!tar -xf /content/drive/MyDrive/Colab\ Notebooks/Final-project/val.tar.gz 
!tar -xf /content/drive/MyDrive/Colab\ Notebooks/Final-project/test.tar.gz

CPU times: user 419 ms, sys: 84.6 ms, total: 503 ms
Wall time: 3min 30s


In [4]:
!pip install -q transformers
!pip install -q hermetrics

[K     |████████████████████████████████| 1.5MB 13.3MB/s 
[K     |████████████████████████████████| 2.9MB 60.3MB/s 
[K     |████████████████████████████████| 890kB 61.1MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [5]:
# Python / Básics
import os
import gc
import json
from pathlib import Path
from PIL import Image
import numpy as np
import collections
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
 
# Torch
import torch
import torch.nn as nn
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
 
# Transformers 
from transformers import get_linear_schedule_with_warmup, AdamW
from transformers import MobileBertTokenizer, MobileBertForQuestionAnswering, MobileBertModel

In [6]:
model_checkpoint = 'mrm8488/mobilebert-uncased-finetuned-squadv2'
tokenizer = MobileBertTokenizer.from_pretrained(model_checkpoint)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=85.0, style=ProgressStyle(description_w…




In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
manual_seed = 2357 # only primers ;)
 
def deterministic(rep=True):
    if rep:
        np.random.seed(manual_seed)
        torch.manual_seed(manual_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(manual_seed)
            torch.cuda.manual_seed_all(manual_seed)
        torch.backends.cudnn.enabled = False 
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        print(f'Experimento deterministico, seed: {manual_seed} -- ', end = '')
        print(f'Existe {torch.cuda.device_count()} GPU\
 {torch.cuda.get_device_name(0)} disponível.')
    else:
        print('Experimento randomico')
deterministic()

Experimento deterministico, seed: 2357 -- Existe 1 GPU Tesla V100-SXM2-16GB disponível.


In [8]:
TRANS_IMG = transforms.Compose(
    [
     transforms.Resize((640, 480)), 
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
     
MAX_LEN = 512 
 
def get_questions_context_answers_and_file_images(path, phase):
    with open(path, 'rb') as handle:
        dataset = json.loads(handle.read())
    
    contexts = []
    phase = phase+'/'
    for i, d in enumerate(dataset['data']):
        ocr_file = d['image'].replace('documents', 'ocr_results').replace('.png', '.json')
        
        with open(phase + ocr_file, 'rb') as f:
            ocr = json.loads(f.read())
    
        lines = ocr['recognitionResults'][0]['lines']
    
        text = ' '.join([w['text'] for l in lines for w in l['words']])
        contexts.append(lines)
    
    context_samples = []
    for i, doc in enumerate(contexts):
        test_list_item = []
        for item in doc:
            test_list_item.append(item['text'])
        context_samples.append(' '.join(test_list_item).lower())
    
    # --> Questions 
    questions_samples = []
    for d in dataset['data']:
        questions_samples.append(d['question'].lower())
    
    # --> Answers
    answers_samples = []
    for d in dataset['data']:
        answers_samples.append(d['answers'][0].lower())
    
    # --> Images files 
    img_file_samples = []
    for image in dataset['data']:
        img_file = image['image']
        img_file_samples.append(phase+img_file)        
    
    questions, contexts, answers, img_files = [], [], [], []
    for q,c,a, im_f in zip(questions_samples, context_samples, answers_samples, img_file_samples):
        questions.append(q)
        contexts.append(c)
        answers.append(a)
        img_files.append(im_f)
    
    return questions, contexts, answers, img_files
 
def get_final_data(path, phase, max_len=MAX_LEN):
    questions, context, answers, img_files = get_questions_context_answers_and_file_images(path, phase)
    data = pd.DataFrame([])
    for quest, cont, ans, img_file in zip(questions, context, answers, img_files):
        data = data.append(
            pd.DataFrame(
                {
                    'Img_file':img_file,
                    'Question': quest,
                    'Context': cont,
                    'Answer': ans,
                }, index=[0]), ignore_index=True)
    
    start_position_label, end_position_label = [], []
    for i, (c, l) in enumerate(zip(
        data.Context.to_list(), 
        data.Answer.to_list()
        )):
        start_index = c.find(l)
        end_index = start_index + len(l)
        if start_index != -1: 
            start_position_label.append(start_index)
            end_position_label.append(end_index)
        else:
            start_position_label.append(None)
            end_position_label.append(None)
 
    data = data.assign(start_pos_label = start_position_label)
    data = data.assign(end_pos_label = end_position_label)
 
    data_cutted = _apply_cut(data, max_len=MAX_LEN)
    
    return data_cutted 
 
def _apply_cut(df, max_len=MAX_LEN):
    df_ = df[~df.start_pos_label.isna()]
    df_ = df_[df_.start_pos_label<max_len]
    df_ = df_.assign(start_pos_label = df_.start_pos_label.apply(lambda x: int(x)))
    df_ = df_.assign(end_pos_label = df_.end_pos_label.apply(lambda x: int(x)))
    return df_
 
class DVQADataset(Dataset):
    def __init__(self, df, tokenizer, max_len=MAX_LEN, transform=TRANS_IMG):
        super().__init__()
 
        data = df.copy()
 
        self.tokenizer = tokenizer
        self.question = data.Question.to_list()
        self.context = data.Context.to_list()
        self.answer = data.Answer.to_list()
        self.start_pos_label = data.start_pos_label.to_list()
        self.end_pos_label = data.end_pos_label.to_list()
        self.img_files = data.Img_file.to_list()
        
        self.max_len = max_len
        self.transform = transform
        
    def __len__(self):
        return len(self.question)
 
    def _sentinell_mask(self, context, answer, start_label, end_label):
        start_char_i = start_label 
        end_char_i = end_label     
        answer_tokens = self.tokenizer.tokenize(answer)
        sentinel_str = ' '.join(['[MASK]']*len(answer_tokens))
        context_w_sentinel = context[:start_char_i] + sentinel_str + context[end_char_i:]
        return answer_tokens, context_w_sentinel
 
    def _tokenize_data(self, question, context, answer, start_label, end_label):
        answer_tokens, context_w_sentinel = self._sentinell_mask(context, answer, start_label, end_label)
        encoded_dict = self.tokenizer.encode_plus(
            question, 
            context_w_sentinel,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True, 
            return_tensors = 'pt',      
        )
        input_ids = encoded_dict['input_ids']
        is_mask_token = (input_ids[0] == self.tokenizer.mask_token_id)
        mask_token_indeces = is_mask_token.nonzero(as_tuple=False)[:, 0]
 
        assert len(mask_token_indeces) == len(answer_tokens)
        
        start_index = mask_token_indeces[0]
        end_index = mask_token_indeces[-1]
 
        answer_token_ids = self.tokenizer.encode(
            answer_tokens, 
            add_special_tokens=False, 
            return_tensors='pt'
            )
 
        input_ids[0, start_index : end_index + 1] = answer_token_ids
        attention_mask = encoded_dict['attention_mask']    
        segment_ids = encoded_dict['token_type_ids']
    
        return input_ids, attention_mask, segment_ids, start_index, end_index
 
    def __getitem__(self, idx):
        img = Image.open(self.img_files[idx]).convert('RGB')
        img = self.transform(img)
        
        input_ids, attention_mask, segment_ids, start_index, end_index = self._tokenize_data(
            self.question[idx], 
            self.context[idx], 
            self.answer[idx], 
            self.start_pos_label[idx], 
            self.end_pos_label[idx]
            )
 
        return (img, self.question[idx], self.context[idx], self.answer[idx], 
                input_ids.squeeze(0), attention_mask.squeeze(0), 
                segment_ids.squeeze(0), start_index, end_index)

In [9]:
# Test do dataset:

paths = {
    'val': {
        'path': 'val/val_v1.0.json',
        'phase': 'val',
        },
    'train': {
        'path': 'train/train_v1.0.json',
        'phase': 'train',
        }         
}

df_val = get_final_data(
    path=paths['val']['path'], 
    phase=paths['val']['phase']
    )
print(f'DF_VAL  : {df_val.shape}')

df_train = get_final_data(
    path=paths['train']['path'], 
    phase=paths['train']['phase']
    )
print(f'DF_TRAIN: {df_train.shape}\n')

ds_train = DVQADataset(
    df=df_train,
    tokenizer=tokenizer,
    max_len=MAX_LEN,
    transform=TRANS_IMG, 
)

img, question, context, answer, input_ids, attention_mask, segment_ids, start_index, end_index = ds_train[-1]
outs  = [img, question, context, answer, input_ids, attention_mask, segment_ids, start_index, end_index]
names = ['img', 'question', 'context', 'answer', 'input_ids', 'attention_mask', 'segment_ids', 'start_index', 'end_index']
for out, name in zip(outs, names):
    if type(out) != str and name.find('index') ==-1:
        print(name, f':{out.size()}')
    else:
        print(name,':', out)

DF_VAL  : (3209, 6)
DF_TRAIN: (24606, 6)

img :torch.Size([3, 640, 480])
question : what is the table number ?
context : table 4-a relative risk of falling into extreme 20% according to supplementation ingestion and socioeconomic status variable: composite infant scale 15 month mental score sample size (n's) percentages chi-square test performance group total sample low ' med. 2 high tot. low med. high tot. x 2 d.f. 2 supplementation 52 101 24 177 29 57 14 100 27.1 4 2.01 44 157 54 255 17 62 21 100 category : n 5 46 26 77 60 34 100 101 304 104 509 20 60 20 low ses o 23 50 79 29 63 8 100 18.4 4 4.01 22 79 29 130 17 61 22 100 in 28 13 43 5 65 30 100 t 47 157 48 252 19 62 19 high ses 0 27 45 18 90 30 50 20 100 11.7 4 l.05 22 75 25 122 18 61. 20 100 n 3 18 13 34 9 53 38 100 52 138 56 246 21 56 23 1 = lowest pentile 2 = middle 60% of scores 3 = highest pentile source: https://www.industrydocuments.ucsf.edu/docs/yyhd0227
answer : 4-a
input_ids :torch.Size([512])
attention_mask :torch.Size([5

In [10]:
BATCH_SZ = 8

# TRAIN Dataset
ds_train = DVQADataset(
    df=df_train,
    tokenizer=tokenizer,
    max_len=MAX_LEN,
    transform=TRANS_IMG, 
)

N = 100
# Debug Dataset (N samples)
ds_debug = DVQADataset(
    df=df_train[:N],
    tokenizer=tokenizer,
    max_len=MAX_LEN,
    transform=TRANS_IMG, 
)

# VAL Dataset
ds_val = DVQADataset(
    df=df_val,
    tokenizer=tokenizer,
    max_len=MAX_LEN,
    transform=TRANS_IMG, 
)

# Dataloaders
dataloaders = {
    'debug': DataLoader(
         ds_debug,
         batch_size=BATCH_SZ,
         shuffle=False,
         num_workers=os.cpu_count(),
         pin_memory=True
         ),
    'train': DataLoader(
         ds_train,
         batch_size=BATCH_SZ,
         shuffle=True,
         num_workers=os.cpu_count(),
         pin_memory=True
         ),
    'val': DataLoader(
         ds_val,
         batch_size=BATCH_SZ,
         num_workers=os.cpu_count(),
         pin_memory=True
         ),
     }
 
# teste de sanidade
_ = {x: len(dataloaders[x]) for x in dataloaders.keys()}
_

{'debug': 13, 'train': 3076, 'val': 402}

In [11]:
# Teste do Dataloader 
img, question, context, answer, input_ids, attention_mask, segment_ids, start_pos, end_pos = next(iter(dataloaders['debug']))

batch = next(iter(dataloaders['debug']))
batch_device = tuple(t.to(device) if type(t) != list else t for t in batch)


print('img.shape:             ', img.shape)
print('question:              ', question)
print('context:               ', context)
print('answer:                ', answer)
print('input_ids.shape:       ', input_ids.shape)
print('attention_masks.shape: ', attention_mask.shape)
print('segment_ids.shape:     ', segment_ids.shape)
print('start_positions.shape: ', start_pos)
print('end_positions.shape:   ', end_pos)

img.shape:              torch.Size([8, 3, 640, 480])
question:               ['what is the contact person name mentioned in letter?', "which corporation's letterhead is this?", 'who is in  cc in this letter?', 'what is the subject of  this letter?', 'what is the date in the letter', 'who sent the letter?', 'which part of virginia is this letter sent from', 'what is the date mentioned in the letter?']
context:                ['confidential .. .. rjrt pr approval date : 1/8/13 ru alas proposed release date: for response for release to: contact: p. carter route to initials pate peggy carter ac maura payne david fishel tom griscom diane barrows ed blackmer tow rucker tr return to peggy carter, pr, 16 reynolds building 51142 3977 . . source: https://www.industrydocuments.ucsf.edu/docs/xnb10037', 'b&w brown & williamson tobacco corporation research & development . . . . internal correspondence to: r. h. honeycutt cc: t.f. riehl from: c. j. cook date: may 8. 1995 subject: review of existing b

In [12]:
def metric_Levenshtein(seq1, seq2):
    lev = Levenshtein()
    return lev.distance(seq1, seq2)

def normalize_answer(s):
    def white_space_fix(text):
        return ' '.join(text.split())
 
    def lower(text):
        return text.lower()
 
    return white_space_fix(lower(s))
 
def get_tokens(s):
    if not s: return []
    return normalize_answer(s).split()
 
def _compute_exact(a_gold, a_pred):
    return int(normalize_answer(a_gold) == normalize_answer(a_pred))
 
def _compute_f1(a_gold, a_pred):
    gold_toks = get_tokens(a_gold)
    pred_toks = get_tokens(a_pred)
    common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
    num_same = sum(common.values())
    if len(gold_toks) == 0 or len(pred_toks) == 0:
        return int(gold_toks == pred_toks)
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(pred_toks)
    recall = 1.0 * num_same / len(gold_toks)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def train_model(model, device, train_loader, optimizer, scheduler):
    model.train()
    size = len(train_loader)    
    total_train_loss = 0
 
    for step, batch in enumerate(train_loader):
        model.zero_grad()        
        batch_device = tuple(t.to(device) if type(t) != list else t for t in batch)

        loss = model(batch_device)

        total_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / size            
 
    return avg_train_loss
 
def get_score(model, val_loader, max_len=MAX_LEN):
    model.eval()
    size = len(val_loader) * BATCH_SZ
    total_f1, total_exact = 0, 0

    for step, batch in enumerate(val_loader):
        batch_device = tuple(t.to(device) if type(t) != list else t for t in batch)

        with torch.no_grad():        
            trues, preds = model(batch_device)

        total_exact += sum([_compute_exact(ans, pred) for ans, pred in zip(trues, preds)]) / size
        total_f1 += sum([_compute_f1(ans, pred) for ans, pred in zip(trues, preds)]) / size
        
    return total_f1, total_exact

# Model
 

In [13]:
class DQVA_Model(nn.Module):
    def __init__(self, convnet, nlp_model, emb_size, max_len=MAX_LEN, use_img_feats=False):
        super().__init__()
 
        self.convnet = convnet
        self.max_len = max_len
        self.use_img_feats = use_img_feats

        self.tokenizer = MobileBertTokenizer.from_pretrained(nlp_model)
        self.nlp_extract_embs = MobileBertModel.from_pretrained(nlp_model)
        self.nlp_qa = MobileBertForQuestionAnswering.from_pretrained(nlp_model, return_dict=True)

        self.fc = nn.Linear(2 * emb_size, emb_size//4)
        self.ln = nn.LayerNorm(emb_size//4)

    def forward(self, batch):
        img, question, context, answer, input_ids, attention_mask, segment_ids, start_pos, end_pos = batch

        if self.training:

            if self.use_img_feats:
                question_context_embeds = self.nlp_extract_embs(
                    input_ids=input_ids, 
                    attention_mask=attention_mask, 
                    token_type_ids=segment_ids,
                    ).last_hidden_state
                img_embeds = self.convnet(img)

                inputs_embeds = torch.cat((question_context_embeds, img_embeds), dim=-1)
                inputs_embeds = self.fc(inputs_embeds)
                inputs_embeds = self.ln(inputs_embeds)
                                
                outputs = self.nlp_qa(
                    inputs_embeds=inputs_embeds,
                    start_positions=start_pos,
                    end_positions=end_pos,
                    )
            else:
                outputs = self.nlp_qa(
                    input_ids=input_ids,
                    start_positions=start_pos,
                    end_positions=end_pos,
                    )
            
            return outputs['loss']

        else:
            return self.predict(question, context, answer)

    def predict(self, question, context, answer):
        trues, preds = [], []
        for q, c, answer in zip(question, context, answer):
            predict_inputs = self.tokenizer.encode(
                q, c,
                max_length=self.max_len, 
                truncation=True,
                )
            sep_index = predict_inputs.index(self.tokenizer.sep_token_id)
            num_seg_a = sep_index + 1
            num_seg_b = len(predict_inputs) - num_seg_a
            segment_ids = [0]*num_seg_a + [1]*num_seg_b
        
            assert len(segment_ids) == len(predict_inputs), 'crap segments'
        
            with torch.no_grad():        
                outputs = self.nlp_qa(
                    torch.tensor([predict_inputs]).to(device), 
                    token_type_ids=torch.tensor([segment_ids]).to(device)) 
                    
            start_logits = outputs['start_logits']
            end_logits = outputs['end_logits']
            answer_start = torch.argmax(start_logits)
            answer_end = torch.argmax(end_logits)
        
            tokens = tokenizer.convert_ids_to_tokens(predict_inputs)
            pred_answers = tokens[answer_start]
    
            for i in range(answer_start + 1, answer_end + 1):
                if tokens[i][0:2] == '##':
                    pred_answers += tokens[i][2:]
                else:
                    pred_answers += ' ' + tokens[i]
        
            preds.append(pred_answers)
            trues.append(answer)

        return trues, preds

class EncoderResnet(nn.Module):
    def __init__(self, cnn, channels, embed_size):
        super(EncoderResnet, self).__init__()
        self.cnn = nn.Sequential(*list(cnn.children())[:-2])
        self.conv1 = nn.Conv2d(channels, embed_size, 1)
        self.adaptive = nn.AdaptiveAvgPool2d((16, 32))
        self.embed_size = embed_size
        
    def forward(self, x):
        output = self.cnn(x) # N, C=2048 H, W
        output = self.conv1(output)
        output = self.adaptive(output)
        output = output.view(output.size(0), self.embed_size, -1)  
        output = output.permute(0, 2, 1) 
        return output
 
    def freeze(self):
        for p in self.cnn.parameters():
            p.requires_grad = False
        for c in list(self.cnn.children()):#[-2:]:
            for p in c.parameters():
                p.requires_grad = False

# --> Testing the Network
try:
    del model
    del resnet
    gc.collect()
    torch.cuda.empty_cache()
except:
    pass
 
EMB_SIZE = 512
resnet = models.resnet50(pretrained=True)
resnet_model = EncoderResnet(resnet, 4 * EMB_SIZE, EMB_SIZE).to(device)
# resnet_model.freeze()
 
model = DQVA_Model(
    convnet=resnet_model, 
    nlp_model=model_checkpoint, 
    emb_size=EMB_SIZE,
    max_len=MAX_LEN, 
    use_img_feats=True
    ).to(device)

# model.fc.weight.requires_grad = False
with torch.no_grad():
    loss = model(batch_device)
print('loss', loss)

#------------
model.eval()
with torch.no_grad():
    trues, preds = model(batch_device)
print(f'TRUES: {trues}')
print(f'PREDS: {preds}')

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=765.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=98614149.0, style=ProgressStyle(descrip…


loss tensor(5.2690, device='cuda:0')
TRUES: ['p. carter', 'brown & williamson tobacco corporation', 't.f. riehl', 'review of existing brainstorming ideas/483', 'june 11, 1990', 'ted sanders', 'richmond', 'april 27, 1990']
PREDS: ['p . carter', 'b & w brown & williamson tobacco corporation', 't . f . riehl', 'existing brainstorming ideas', 'june 11 , 1990', 'dr . k . s . houghton', 'richmond', 'april 27 , 1990']


In [14]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
 
print('\n','#' * 53, f'\n # Número de params. {count_parameters(model):,}' \
       ' trainable parameters #\n', '#' * 53,'\n')  
# model


 ##################################################### 
 # Número de params. 73,853,378 trainable parameters #
 ##################################################### 



# Overfit

In [15]:
OVERFIT = False

if OVERFIT:
    deterministic()
    N_EPOCHS = 5
    try:
        del model
        gc.collect()
        torch.cuda.empty_cache()
    except:
        pass
    
    model = DQVA_Model(
        convnet=resnet_model, 
        nlp_model=model_checkpoint, 
        emb_size=EMB_SIZE,
        max_len=MAX_LEN,
        use_img_feats=False
        ).to(device)

    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5, eps=1e-8)

    total_steps = (len(dataloaders['debug']) * N_EPOCHS)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=total_steps*0.01, num_training_steps=total_steps)

    for epoch_i in range(1, N_EPOCHS+1):
        
        loss_train = train_model(model, device, dataloaders['debug'], optimizer, scheduler)
        print(f'\nEpoca [{epoch_i}/{N_EPOCHS}]: Loss Train: {loss_train:.3f}')
        
        f1_result, exact_result = get_score(model, dataloaders['debug'])
        print(f'              Exact Match: {exact_result:.4f} -- F1: {f1_result:.4}')

In [16]:
deterministic()
N_EPOCHS = 10
 
try:
    del model
    gc.collect()
    torch.cuda.empty_cache()
except:
    pass
 
path_save_model = '/content/drive/MyDrive/Colab Notebooks/Final-project/saved_epochs/'

model = DQVA_Model(
        convnet=resnet_model, 
        nlp_model=model_checkpoint, 
        emb_size=EMB_SIZE,
        max_len=MAX_LEN,
        use_img_feats=False
        ).to(device)

optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5, eps=1e-8)

total_steps = (len(dataloaders['train']) * N_EPOCHS)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=total_steps*0.01, num_training_steps=total_steps)

#----------------------------------------------------------------

training_stats = []
for epoch_i in range(1, N_EPOCHS+1):
        
    loss_train = train_model(model, device, dataloaders['train'], optimizer, scheduler)
    print(f'\nEpoca [{epoch_i}/{N_EPOCHS}]: Loss Train: {loss_train:.3f}')
    
    f1_result, exact_result = get_score(model, dataloaders['val'])
    print(f'              Exact Match: {exact_result:.4f} -- F1: {f1_result:.4}')

    # saving
    torch.save(model.state_dict(), path_save_model+'Mobile_F6'+str(epoch_i))
 
    training_stats.append({'epoch': epoch_i, 'Training Loss': loss_train, 'Exact': exact_result, 'F1': f1_result})

Experimento deterministico, seed: 2357 -- Existe 1 GPU Tesla V100-SXM2-16GB disponível.

Epoca [1/10]: Loss Train: 1.207
              Exact Match: 0.3225 -- F1: 0.4513

Epoca [2/10]: Loss Train: 0.738
              Exact Match: 0.3545 -- F1: 0.4791

Epoca [3/10]: Loss Train: 0.533
              Exact Match: 0.3495 -- F1: 0.4742

Epoca [4/10]: Loss Train: 0.396
              Exact Match: 0.3576 -- F1: 0.48

Epoca [5/10]: Loss Train: 0.301
              Exact Match: 0.3616 -- F1: 0.4829

Epoca [6/10]: Loss Train: 0.230
              Exact Match: 0.3623 -- F1: 0.4818

Epoca [7/10]: Loss Train: 0.184
              Exact Match: 0.3678 -- F1: 0.4844

Epoca [8/10]: Loss Train: 0.141
              Exact Match: 0.3635 -- F1: 0.4838

Epoca [9/10]: Loss Train: 0.137
              Exact Match: 0.3697 -- F1: 0.4855

Epoca [10/10]: Loss Train: 0.093
              Exact Match: 0.3703 -- F1: 0.486


In [17]:
df_stats = pd.DataFrame(data=training_stats)
df_stats = df_stats.set_index('epoch')
pd.set_option('precision', 2)
df_stats

Unnamed: 0_level_0,Training Loss,Exact,F1
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.21,0.32,0.45
2,0.74,0.35,0.48
3,0.53,0.35,0.47
4,0.4,0.36,0.48
5,0.3,0.36,0.48
6,0.23,0.36,0.48
7,0.18,0.37,0.48
8,0.14,0.36,0.48
9,0.14,0.37,0.49
10,0.09,0.37,0.49
