In [1]:
import os
import pickle
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from utils_data import  Vocabulary, Vectorizer, HeadQA, HeadQA_IR
from utils_data import parse_dataset, parse_ir_dataset, random_oversamplig, random_undersampling
from utils_data import filter_by_category, save_dataset_to_pickle, load_dataset_from_pickle

from training import get_optimizer, train, train_ir, validate, validate_ir, evaluator, evaluator_ir, evaluate
from training import load_embeddings_from_file, make_embedding_matrix
from training import pad_seq, encoder_bert, encoder_bert_ir, encoder_bert_instance, encoder_bert_ir_instance
from training import evaluator_bert, evaluator_bert_ir, evaluate_better

from supervised_models import LogisticRegression, BasicLSTM, BiLSTM_model
from ir_models import LSTM_QA, LSTM_CNN_QA, BERT_QA

%matplotlib inline
%load_ext autoreload
%autoreload 2



In [2]:
CATEGORY = 'medicine'

In [3]:
from datasets import load_dataset

data_es = load_dataset('head_qa', 'es' )
training, validation, testing = data_es['train'], data_es['validation'], data_es['test']

Reusing dataset head_qa (C:\Users\tec005m\.cache\huggingface\datasets\head_qa\es\1.1.0\473dc5357942a3ff52963bd73cad0d167bd1bbc1ca5ca0732ee7372b480dd735)


### Modelos supervisados puros

In [36]:
training_instances = load_dataset_from_pickle('../data/training.pickle')
validation_instances = load_dataset_from_pickle('../data/validation.pickle')
testing_instances = load_dataset_from_pickle('../data/testing.pickle')

oversampled_training = load_dataset_from_pickle('../data/oversampled_training.pickle')

In [37]:
training_categ = filter_by_category(oversampled_training, category=CATEGORY)
validation_categ = filter_by_category(validation_instances, category=CATEGORY)
testing_categ = filter_by_category(testing_instances, category=CATEGORY)

dev_categ = filter_by_category(validation, category=CATEGORY)
test_categ = filter_by_category(testing, category=CATEGORY)

In [38]:
vectorizer = Vectorizer.vectorize_training(training_categ)
vocab = vectorizer.sentence_vocab
label_vocab = vectorizer.label_vocab

vectorizer.label_vocab.vocab2index = {1:1, 0:0}
vectorizer.label_vocab.index2vocab = {0:0, 1:1}

In [39]:
trainset = HeadQA(instances=training_categ, vectorizer=vectorizer, right_padding=False, max_length=30)
validset = HeadQA(instances=validation_categ, vectorizer=vectorizer, right_padding=False, max_length=30)
testset = HeadQA(instances=testing_categ, vectorizer=vectorizer, right_padding=False, max_length=30)

In [40]:
batch_size = 32
train_dt = DataLoader(trainset, batch_size=batch_size,drop_last=True)
valid_dt = DataLoader(validset, batch_size=batch_size,drop_last=True)
test_dt = DataLoader(testset, batch_size=batch_size,drop_last=True)

#### Logistic Regressor

In [9]:
logistic_regressor = LogisticRegression(trainset.max_length, 1)
optimizer = get_optimizer(logistic_regressor, lr = 0.01, wd = 1e-5)

In [10]:
training_results = train(logistic_regressor, optimizer, train_dt, valid_dt, validate, epochs=30)



Epoch 0 train loss  50.2596 valid loss 1.781 and accuracy 0.5056
Epoch 1 train loss  44.0149 valid loss 2.762 and accuracy 0.2935
Epoch 2 train loss  50.9618 valid loss 2.762 and accuracy 0.2824
Epoch 3 train loss  50.6703 valid loss 2.762 and accuracy 0.2857
Epoch 4 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 5 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 6 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 7 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 8 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 9 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 10 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 11 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 12 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 13 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 14 train loss  50.6752 valid loss 2.762 and accuracy 0.2857
Epoch 15 train loss 

In [11]:
acc, points = evaluate(logistic_regressor, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(logistic_regressor, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: medicine
accuracy: tensor([0.2554]), points: 5
----------
TEST Dominio: medicine
accuracy: tensor([0.2376]), points: -23


In [12]:
model_path = os.getcwd() + f'/trained_models/logistic_regressor_{CATEGORY}'
torch.save(logistic_regressor.state_dict(), model_path)

#### LSTM

In [13]:
lstm = BasicLSTM(len(vocab), 64, trainset.max_length, 1, embedding_dim=100)
optimizer = get_optimizer(lstm, lr = 0.001, wd = 1e-5)

In [14]:
training_results = train(lstm, optimizer, train_dt, valid_dt, validate, epochs=50)

Epoch 0 train loss  0.6714 valid loss 0.029 and accuracy 0.2500
Epoch 1 train loss  0.7211 valid loss 0.027 and accuracy 0.2500
Epoch 2 train loss  0.7044 valid loss 0.026 and accuracy 0.2500
Epoch 3 train loss  0.6998 valid loss 0.026 and accuracy 0.2500
Epoch 4 train loss  0.6976 valid loss 0.026 and accuracy 0.2500
Epoch 5 train loss  0.6954 valid loss 0.026 and accuracy 0.2500
Epoch 6 train loss  0.6942 valid loss 0.026 and accuracy 0.2500
Epoch 7 train loss  0.6914 valid loss 0.026 and accuracy 0.2500
Epoch 8 train loss  0.6865 valid loss 0.026 and accuracy 0.2522
Epoch 9 train loss  0.6745 valid loss 0.026 and accuracy 0.2578
Epoch 10 train loss  0.6580 valid loss 0.026 and accuracy 0.2801
Epoch 11 train loss  0.6351 valid loss 0.026 and accuracy 0.3147
Epoch 12 train loss  0.6090 valid loss 0.026 and accuracy 0.3538
Epoch 13 train loss  0.5804 valid loss 0.026 and accuracy 0.3672
Epoch 14 train loss  0.5657 valid loss 0.026 and accuracy 0.4129
Epoch 15 train loss  0.5347 valid l

In [15]:
acc, points = evaluate(lstm, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: medicine
accuracy: tensor([0.2294]), points: -19
----------
TEST Dominio: medicine
accuracy: tensor([0.2181]), points: -59


In [16]:
model_path = os.getcwd() + f'/trained_models/basic_lstm_{CATEGORY}'
torch.save(lstm.state_dict(), model_path)

#### BiLSTM

In [41]:
word_to_idx = load_dataset_from_pickle('trained_models/biomedical_embeddings/word_to_index.pickle')
embeddings = load_dataset_from_pickle('trained_models/biomedical_embeddings/wordvectors.pickle')
embedding_file = "trained_models/biomedical_embeddings/Scielo_wiki_FastText300.vec"
words = vocab.vocab2index.keys()
embedding_matrix = make_embedding_matrix(embedding_file, list(words), word_to_idx, embeddings)

In [18]:
bilstm = BiLSTM_model(embedding_matrix.shape[1], embedding_matrix.shape[0], 1, 
                     pretrained_embeddings=embedding_matrix, max_length=trainset.max_length)
optimizer = get_optimizer(bilstm, lr = 0.01, wd = 1e-5)



In [19]:
training_results = train(bilstm, optimizer, train_dt, valid_dt, validate, epochs=50)

Epoch 0 train loss  0.3829 valid loss 2.762 and accuracy 0.2500
Epoch 1 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 2 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 3 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 4 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 5 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 6 train loss  50.7812 valid loss 2.762 and accuracy 0.2500
Epoch 7 train loss  81.5559 valid loss 0.921 and accuracy 0.7500
Epoch 8 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 9 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 10 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 11 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 12 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 13 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 14 train loss  49.2188 valid loss 0.921 and accuracy 0.7500
Epoch 15 train loss  

In [20]:
acc, points = evaluate(bilstm, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(bilstm, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: medicine
accuracy: tensor([0.2468]), points: -3
----------
TEST Dominio: medicine
accuracy: tensor([0.2138]), points: -67


In [21]:
model_path = os.getcwd() + f'/trained_models/bilstm_{CATEGORY}'
torch.save(bilstm.state_dict(), model_path)

### Modelos supervisados IR

In [43]:
training_instances = load_dataset_from_pickle('../data/training_ir.pickle')
validation_instances = load_dataset_from_pickle('../data/validation_ir.pickle')
testing_instances = load_dataset_from_pickle('../data/testing_ir.pickle')
oversampled_training = load_dataset_from_pickle('../data/oversampled_training_ir.pickle')

In [44]:
training_categ = filter_by_category(oversampled_training, category=CATEGORY)
validation_categ = filter_by_category(validation_instances, category=CATEGORY)
testing_categ = filter_by_category(testing_instances, category=CATEGORY)

dev_categ = filter_by_category(validation, category=CATEGORY)
test_categ = filter_by_category(testing, category=CATEGORY)

In [45]:
vectorizer = Vectorizer.vectorize_ir_dataset(oversampled_training)
vocab = vectorizer.sentence_vocab
label_vocab = vectorizer.label_vocab

vectorizer.label_vocab.vocab2index = {1:1, 0:0}
vectorizer.label_vocab.index2vocab = {0:0, 1:1}

In [46]:
trainset = HeadQA_IR(instances=training_instances, vectorizer=vectorizer, right_padding=False, max_length=15)
validset = HeadQA_IR(instances=validation_instances, vectorizer=vectorizer, right_padding=False, max_length=15)
testset = HeadQA_IR(instances=testing_instances, vectorizer=vectorizer, right_padding=False, max_length=15)

In [47]:
batch_size = 32
train_dt = DataLoader(trainset, batch_size=batch_size,drop_last=True)
valid_dt = DataLoader(validset, batch_size=batch_size,drop_last=True)
test_dt = DataLoader(testset, batch_size=batch_size,drop_last=True)

In [48]:
word_to_idx = load_dataset_from_pickle('trained_models/biomedical_embeddings/word_to_index_ir.pickle')
embeddings = load_dataset_from_pickle('trained_models/biomedical_embeddings/wordvectors_ir.pickle')
embedding_file = "trained_models/biomedical_embeddings/Scielo_wiki_FastText300.vec"
words = vocab.vocab2index.keys()
embedding_matrix = make_embedding_matrix(embedding_file, list(words), word_to_idx, embeddings)

#### LSTM-QA

In [28]:
lstm_qa = LSTM_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
optimizer = get_optimizer(lstm_qa, lr = 0.001, wd = 1e-5)

Loading pretrained embeddings...


In [29]:
training_results = train_ir(lstm_qa, optimizer, train_dt, valid_dt, validate_ir, epochs=50)

Epoch 0 train loss  0.5018 valid loss 0.003 and accuracy 0.7500
Epoch 1 train loss  0.4943 valid loss 0.003 and accuracy 0.7498
Epoch 2 train loss  0.4642 valid loss 0.004 and accuracy 0.7298
Epoch 3 train loss  0.4050 valid loss 0.004 and accuracy 0.7314
Epoch 4 train loss  0.3450 valid loss 0.004 and accuracy 0.7226
Epoch 5 train loss  0.2816 valid loss 0.004 and accuracy 0.6450
Epoch 6 train loss  0.2571 valid loss 0.005 and accuracy 0.6410
Epoch 7 train loss  0.2041 valid loss 0.007 and accuracy 0.7031
Epoch 8 train loss  0.1722 valid loss 0.007 and accuracy 0.7074
Epoch 9 train loss  0.1633 valid loss 0.006 and accuracy 0.6912
Epoch 10 train loss  0.1230 valid loss 0.007 and accuracy 0.6278
Epoch 11 train loss  0.1097 valid loss 0.008 and accuracy 0.6686
Epoch 12 train loss  0.1075 valid loss 0.007 and accuracy 0.6901
Epoch 13 train loss  0.0810 valid loss 0.008 and accuracy 0.6987
Epoch 14 train loss  0.0708 valid loss 0.008 and accuracy 0.6833
Epoch 15 train loss  0.0648 valid l

In [30]:
acc, points = evaluate(lstm_qa, dev_categ, trainset.encode, evaluator_ir)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm_qa, test_categ, trainset.encode, evaluator_ir)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: medicine
accuracy: tensor([0.2381]), points: -11
----------
TEST Dominio: medicine
accuracy: tensor([0.2527]), points: 5


In [31]:
model_path = os.getcwd() + f'/trained_models/lstm_qa_{CATEGORY}'
torch.save(lstm_qa.state_dict(), model_path)

#### LSTM-QA/CNN

In [32]:
lstm_cnn_qa = LSTM_CNN_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
optimizer = get_optimizer(lstm_cnn_qa, lr = 0.001, wd = 1e-5)

Loading pretrained embeddings...


In [33]:
training_results = train_ir(lstm_cnn_qa, optimizer, train_dt, valid_dt, validate_ir, epochs=50)

Epoch 0 train loss  0.5017 valid loss 0.003 and accuracy 0.7500
Epoch 1 train loss  0.4951 valid loss 0.003 and accuracy 0.7496
Epoch 2 train loss  0.4694 valid loss 0.004 and accuracy 0.7460
Epoch 3 train loss  0.4222 valid loss 0.004 and accuracy 0.5665
Epoch 4 train loss  0.3602 valid loss 0.005 and accuracy 0.6445
Epoch 5 train loss  0.2998 valid loss 0.005 and accuracy 0.6921
Epoch 6 train loss  0.2536 valid loss 0.006 and accuracy 0.6783
Epoch 7 train loss  0.2044 valid loss 0.007 and accuracy 0.7191
Epoch 8 train loss  0.1763 valid loss 0.009 and accuracy 0.7009
Epoch 9 train loss  0.1483 valid loss 0.008 and accuracy 0.6537
Epoch 10 train loss  0.1209 valid loss 0.006 and accuracy 0.6901
Epoch 11 train loss  0.0960 valid loss 0.009 and accuracy 0.6947
Epoch 12 train loss  0.0793 valid loss 0.008 and accuracy 0.6618
Epoch 13 train loss  0.0825 valid loss 0.010 and accuracy 0.7075
Epoch 14 train loss  0.0801 valid loss 0.008 and accuracy 0.6840
Epoch 15 train loss  0.0679 valid l

In [34]:
acc, points = evaluate(lstm_cnn_qa, dev_categ, trainset.encode, evaluator_ir)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm_cnn_qa, test_categ, trainset.encode, evaluator_ir)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: medicine
accuracy: tensor([0.2381]), points: -11
----------
TEST Dominio: medicine
accuracy: tensor([0.2592]), points: 17


In [35]:
model_path = os.getcwd() + f'/trained_models/lstm_cnn_qa_{CATEGORY}'
torch.save(lstm_cnn_qa.state_dict(), model_path)

### Evaluacion

In [42]:
logistic_regressor = LogisticRegression(trainset.max_length, 1)
lstm = BasicLSTM(len(vocab), 64, trainset.max_length, 1, embedding_dim=100)
bilstm = BiLSTM_model(embedding_matrix.shape[1], embedding_matrix.shape[0], 1, 
                     pretrained_embeddings=embedding_matrix, max_length=trainset.max_length)

models = [logistic_regressor, lstm, bilstm]
paths = [os.getcwd() + f'/trained_models/logistic_regressor_{CATEGORY}', 
         os.getcwd() + f'/trained_models/basic_lstm_{CATEGORY}',         
         os.getcwd() + f'/trained_models/bilstm_{CATEGORY}']

print(paths[0])

for i, model in enumerate(models):
    model.load_state_dict(torch.load(paths[i]))
    model.eval()
    acc, points, acc_list, points_list = evaluate_better(model, dev_categ, trainset.encode, evaluator)
    print('DEV')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    acc, points, acc_list, points_list = evaluate_better(model, test_categ, trainset.encode, evaluator)
    print('TEST')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    print() 

C:\Users\tec005m\mds\TFM\head-qa-afi\code/trained_models/logistic_regressor_medicine
DEV
Accuracy media 0.25541127
Puntos media 5.0
[tensor(0.2554)]
[5]
---------
TEST
Accuracy media 0.23756345
Puntos media -11.5
[tensor(0.2457), tensor(0.2294)]
[-4, -19]
---------

DEV
Accuracy media 0.22943723
Puntos media -19.0
[tensor(0.2294)]
[-19]
---------
TEST
Accuracy media 0.21812025
Puntos media -29.5
[tensor(0.2284), tensor(0.2078)]
[-20, -39]
---------

DEV
Accuracy media 0.24675325
Puntos media -3.0
[tensor(0.2468)]
[-3]
---------
TEST
Accuracy media 0.21386588
Puntos media -33.5
[tensor(0.1940), tensor(0.2338)]
[-52, -15]
---------



In [49]:
lstm_qa = LSTM_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
lstm_cnn_qa = LSTM_CNN_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)

models = [lstm_qa, lstm_cnn_qa]

paths = [os.getcwd() + f'/trained_models/lstm_qa_{CATEGORY}',
         os.getcwd() + f'/trained_models/lstm_cnn_qa_{CATEGORY}'
        ]

for i, model in enumerate(models):
    model.load_state_dict(torch.load(paths[i]))
    model.eval()
    acc, points, acc_list, points_list = evaluate_better(model, dev_categ, trainset.encode, evaluator_ir)
    print('DEV')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    acc, points, acc_list, points_list = evaluate_better(model, test_categ, trainset.encode, evaluator_ir)
    print('TEST')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    print() 

Loading pretrained embeddings...
Loading pretrained embeddings...
DEV
Accuracy media 0.23809524
Puntos media -11.0
[tensor(0.2381)]
[-11]
---------
TEST
Accuracy media 0.25266832
Puntos media 2.5
[tensor(0.2672), tensor(0.2381)]
[16, -11]
---------

DEV
Accuracy media 0.23809524
Puntos media -11.0
[tensor(0.2381)]
[-11]
---------
TEST
Accuracy media 0.25914317
Puntos media 8.5
[tensor(0.2759), tensor(0.2424)]
[24, -7]
---------

