In [1]:
import os
import pickle
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from utils_data import  Vocabulary, Vectorizer, HeadQA, HeadQA_IR
from utils_data import parse_dataset, parse_ir_dataset, random_oversamplig, random_undersampling
from utils_data import filter_by_category, save_dataset_to_pickle, load_dataset_from_pickle
import training
from training import get_optimizer, train, train_ir, validate, validate_ir, evaluator, evaluator_ir, evaluate
from training import load_embeddings_from_file, make_embedding_matrix
from training import pad_seq, encoder_bert, encoder_bert_ir, encoder_bert_instance, encoder_bert_ir_instance
from training import evaluator_bert, evaluator_bert_ir, evaluate_better

from supervised_models import LogisticRegression, BasicLSTM, BiLSTM_model
from ir_models import LSTM_QA, LSTM_CNN_QA, BERT_QA

%matplotlib inline
%load_ext autoreload
%autoreload 2



In [2]:
CATEGORY = 'psychology'

In [3]:
from datasets import load_dataset

data_es = load_dataset('head_qa', 'es' )
training, validation, testing = data_es['train'], data_es['validation'], data_es['test']

Reusing dataset head_qa (C:\Users\tec005m\.cache\huggingface\datasets\head_qa\es\1.1.0\473dc5357942a3ff52963bd73cad0d167bd1bbc1ca5ca0732ee7372b480dd735)


### Modelos supervisados puros

In [4]:
training_instances = load_dataset_from_pickle('../data/training.pickle')
validation_instances = load_dataset_from_pickle('../data/validation.pickle')
testing_instances = load_dataset_from_pickle('../data/testing.pickle')

oversampled_training = load_dataset_from_pickle('../data/oversampled_training.pickle')

In [5]:
training_categ = filter_by_category(oversampled_training, category=CATEGORY)
validation_categ = filter_by_category(validation_instances, category=CATEGORY)
testing_categ = filter_by_category(testing_instances, category=CATEGORY)

dev_categ = filter_by_category(validation, category=CATEGORY)
test_categ = filter_by_category(testing, category=CATEGORY)

In [6]:
vectorizer = Vectorizer.vectorize_training(training_categ)
vocab = vectorizer.sentence_vocab
label_vocab = vectorizer.label_vocab

In [7]:
trainset = HeadQA(instances=training_categ, vectorizer=vectorizer, right_padding=False, max_length=30)
validset = HeadQA(instances=validation_categ, vectorizer=vectorizer, right_padding=False, max_length=30)
testset = HeadQA(instances=testing_categ, vectorizer=vectorizer, right_padding=False, max_length=30)

In [8]:
batch_size = 32
train_dt = DataLoader(trainset, batch_size=batch_size,drop_last=True)
valid_dt = DataLoader(validset, batch_size=batch_size,drop_last=True)
test_dt = DataLoader(testset, batch_size=batch_size,drop_last=True)

#### Logistic Regressor

In [9]:
logistic_regressor = LogisticRegression(trainset.max_length, 1)
optimizer = get_optimizer(logistic_regressor, lr = 0.01, wd = 1e-5)

In [10]:
training_results = train(logistic_regressor, optimizer, train_dt, valid_dt, validate, epochs=30)



Epoch 0 train loss  56.9051 valid loss 1.603 and accuracy 0.5971
Epoch 1 train loss  50.0853 valid loss 1.856 and accuracy 0.4777
Epoch 2 train loss  57.5077 valid loss 1.957 and accuracy 0.5179
Epoch 3 train loss  43.7778 valid loss 2.762 and accuracy 0.3415
Epoch 4 train loss  51.4445 valid loss 2.859 and accuracy 0.3415
Epoch 5 train loss  50.0685 valid loss 2.762 and accuracy 0.2690
Epoch 6 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 7 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 8 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 9 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 10 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 11 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 12 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 13 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 14 train loss  50.7254 valid loss 2.762 and accuracy 0.2690
Epoch 15 train loss 

In [11]:
acc, points = evaluate(logistic_regressor, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(logistic_regressor, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: psychology
accuracy: tensor([0.2434]), points: -6
----------
TEST Dominio: psychology
accuracy: tensor([0.2418]), points: -15


In [12]:
model_path = os.getcwd() + f'/trained_models/logistic_regressor_{CATEGORY}'
torch.save(logistic_regressor.state_dict(), model_path)

#### LSTM

In [14]:
lstm = BasicLSTM(len(vocab), 64, trainset.max_length, 1, embedding_dim=100)
optimizer = get_optimizer(lstm, lr = 0.001, wd = 1e-5)

In [14]:
training_results = train(lstm, optimizer, train_dt, valid_dt, validate, epochs=50)

Epoch 0 train loss  0.6819 valid loss 0.028 and accuracy 0.2500
Epoch 1 train loss  0.7120 valid loss 0.025 and accuracy 0.2500
Epoch 2 train loss  0.6964 valid loss 0.025 and accuracy 0.2500
Epoch 3 train loss  0.6931 valid loss 0.025 and accuracy 0.2500
Epoch 4 train loss  0.6895 valid loss 0.025 and accuracy 0.2500
Epoch 5 train loss  0.6879 valid loss 0.025 and accuracy 0.2500
Epoch 6 train loss  0.6754 valid loss 0.025 and accuracy 0.2545
Epoch 7 train loss  0.6679 valid loss 0.024 and accuracy 0.2790
Epoch 8 train loss  0.6281 valid loss 0.024 and accuracy 0.3259
Epoch 9 train loss  0.5839 valid loss 0.024 and accuracy 0.3873
Epoch 10 train loss  0.5322 valid loss 0.024 and accuracy 0.4509
Epoch 11 train loss  0.4781 valid loss 0.025 and accuracy 0.5424
Epoch 12 train loss  0.4301 valid loss 0.028 and accuracy 0.5502
Epoch 13 train loss  0.4043 valid loss 0.027 and accuracy 0.6038
Epoch 14 train loss  0.3766 valid loss 0.028 and accuracy 0.6105
Epoch 15 train loss  0.3536 valid l

In [15]:
acc, points = evaluate(lstm, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: psychology
accuracy: tensor([0.3142]), points: 58
----------
TEST Dominio: psychology
accuracy: tensor([0.2593]), points: 17


In [16]:
model_path = os.getcwd() + f'/trained_models/basic_lstm_{CATEGORY}'
torch.save(lstm.state_dict(), model_path)

#### BiLSTM

In [12]:
word_to_idx = load_dataset_from_pickle('trained_models/biomedical_embeddings/word_to_index.pickle')
embeddings = load_dataset_from_pickle('trained_models/biomedical_embeddings/wordvectors.pickle')
embedding_file = "trained_models/biomedical_embeddings/Scielo_wiki_FastText300.vec"
words = vocab.vocab2index.keys()
embedding_matrix = make_embedding_matrix(embedding_file, list(words), word_to_idx, embeddings)

In [13]:
bilstm = BiLSTM_model(embedding_matrix.shape[1], embedding_matrix.shape[0], 1, 
                     pretrained_embeddings=embedding_matrix, max_length=trainset.max_length)
optimizer = get_optimizer(bilstm, lr = 0.01, wd = 1e-5)



In [19]:
training_results = train(bilstm, optimizer, train_dt, valid_dt, validate, epochs=50)

Epoch 0 train loss  0.3860 valid loss 2.762 and accuracy 0.2500
Epoch 1 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 2 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 3 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 4 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 5 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 6 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 7 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 8 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 9 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 10 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 11 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 12 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 13 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 14 train loss  50.5580 valid loss 2.762 and accuracy 0.2500
Epoch 15 train loss  

In [20]:
acc, points = evaluate(bilstm, dev_categ, trainset.encode, evaluator)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(bilstm, test_categ, trainset.encode, evaluator)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: psychology
accuracy: tensor([0.2478]), points: -2
----------
TEST Dominio: psychology
accuracy: tensor([0.2418]), points: -15


In [21]:
model_path = os.getcwd() + f'/trained_models/bilstm_{CATEGORY}'
torch.save(bilstm.state_dict(), model_path)

### Modelos supervisados IR

In [16]:
training_instances = load_dataset_from_pickle('../data/training_ir.pickle')
validation_instances = load_dataset_from_pickle('../data/validation_ir.pickle')
testing_instances = load_dataset_from_pickle('../data/testing_ir.pickle')
oversampled_training = load_dataset_from_pickle('../data/oversampled_training_ir.pickle')

In [17]:
training_categ = filter_by_category(oversampled_training, category=CATEGORY)
validation_categ = filter_by_category(validation_instances, category=CATEGORY)
testing_categ = filter_by_category(testing_instances, category=CATEGORY)

dev_categ = filter_by_category(validation, category=CATEGORY)
test_categ = filter_by_category(testing, category=CATEGORY)

In [18]:
vectorizer = Vectorizer.vectorize_ir_dataset(oversampled_training)
vocab = vectorizer.sentence_vocab
label_vocab = vectorizer.label_vocab

In [19]:
trainset = HeadQA_IR(instances=training_instances, vectorizer=vectorizer, right_padding=False, max_length=15)
validset = HeadQA_IR(instances=validation_instances, vectorizer=vectorizer, right_padding=False, max_length=15)
testset = HeadQA_IR(instances=testing_instances, vectorizer=vectorizer, right_padding=False, max_length=15)

In [20]:
batch_size = 32
train_dt = DataLoader(trainset, batch_size=batch_size,drop_last=True)
valid_dt = DataLoader(validset, batch_size=batch_size,drop_last=True)
test_dt = DataLoader(testset, batch_size=batch_size,drop_last=True)

In [21]:
word_to_idx = load_dataset_from_pickle('trained_models/biomedical_embeddings/word_to_index_ir.pickle')
embeddings = load_dataset_from_pickle('trained_models/biomedical_embeddings/wordvectors_ir.pickle')
embedding_file = "trained_models/biomedical_embeddings/Scielo_wiki_FastText300.vec"
words = vocab.vocab2index.keys()
embedding_matrix = make_embedding_matrix(embedding_file, list(words), word_to_idx, embeddings)

#### LSTM-QA

In [22]:
lstm_qa = LSTM_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
optimizer = get_optimizer(lstm_qa, lr = 0.001, wd = 1e-5)

Loading pretrained embeddings...


In [29]:
training_results = train_ir(lstm_qa, optimizer, train_dt, valid_dt, validate_ir, epochs=50)

Epoch 0 train loss  0.5020 valid loss 0.003 and accuracy 0.7500
Epoch 1 train loss  0.4944 valid loss 0.003 and accuracy 0.7493
Epoch 2 train loss  0.4679 valid loss 0.003 and accuracy 0.7364
Epoch 3 train loss  0.4185 valid loss 0.004 and accuracy 0.6364
Epoch 4 train loss  0.3610 valid loss 0.004 and accuracy 0.6754
Epoch 5 train loss  0.2884 valid loss 0.004 and accuracy 0.6915
Epoch 6 train loss  0.2470 valid loss 0.006 and accuracy 0.7197
Epoch 7 train loss  0.2029 valid loss 0.007 and accuracy 0.6956
Epoch 8 train loss  0.1497 valid loss 0.008 and accuracy 0.6649
Epoch 9 train loss  0.1534 valid loss 0.007 and accuracy 0.6774
Epoch 10 train loss  0.1213 valid loss 0.008 and accuracy 0.6608
Epoch 11 train loss  0.0838 valid loss 0.008 and accuracy 0.6647
Epoch 12 train loss  0.0928 valid loss 0.008 and accuracy 0.6561
Epoch 13 train loss  0.0836 valid loss 0.008 and accuracy 0.6465
Epoch 14 train loss  0.0633 valid loss 0.010 and accuracy 0.6675
Epoch 15 train loss  0.0641 valid l

In [30]:
acc, points = evaluate(lstm_qa, dev_categ, trainset.encode, evaluator_ir)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm_qa, test_categ, trainset.encode, evaluator_ir)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: psychology
accuracy: tensor([0.2212]), points: -26
----------
TEST Dominio: psychology
accuracy: tensor([0.2637]), points: 25


In [31]:
model_path = os.getcwd() + f'/trained_models/lstm_qa_{CATEGORY}'
torch.save(lstm_qa.state_dict(), model_path)

#### LSTM-QA/CNN

In [23]:
lstm_cnn_qa = LSTM_CNN_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
optimizer = get_optimizer(lstm_cnn_qa, lr = 0.001, wd = 1e-5)

Loading pretrained embeddings...


In [33]:
training_results = train_ir(lstm_cnn_qa, optimizer, train_dt, valid_dt, validate_ir, epochs=50)

Epoch 0 train loss  0.5018 valid loss 0.003 and accuracy 0.7500
Epoch 1 train loss  0.4933 valid loss 0.003 and accuracy 0.7500
Epoch 2 train loss  0.4598 valid loss 0.004 and accuracy 0.7412
Epoch 3 train loss  0.4021 valid loss 0.004 and accuracy 0.6803
Epoch 4 train loss  0.3371 valid loss 0.004 and accuracy 0.6581
Epoch 5 train loss  0.2982 valid loss 0.005 and accuracy 0.7090
Epoch 6 train loss  0.2329 valid loss 0.005 and accuracy 0.6858
Epoch 7 train loss  0.1926 valid loss 0.008 and accuracy 0.7108
Epoch 8 train loss  0.1703 valid loss 0.007 and accuracy 0.6772
Epoch 9 train loss  0.1304 valid loss 0.008 and accuracy 0.6684
Epoch 10 train loss  0.1069 valid loss 0.007 and accuracy 0.6410
Epoch 11 train loss  0.0900 valid loss 0.010 and accuracy 0.6522
Epoch 12 train loss  0.0839 valid loss 0.008 and accuracy 0.6483
Epoch 13 train loss  0.0856 valid loss 0.007 and accuracy 0.6583
Epoch 14 train loss  0.0698 valid loss 0.008 and accuracy 0.6667
Epoch 15 train loss  0.0601 valid l

In [34]:
acc, points = evaluate(lstm_cnn_qa, dev_categ, trainset.encode, evaluator_ir)
print(f'DEV Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')
print('----------')
acc, points = evaluate(lstm_cnn_qa, test_categ, trainset.encode, evaluator_ir)
print(f'TEST Dominio: {CATEGORY}')
print(f'accuracy: {acc}, points: {points}')

DEV Dominio: psychology
accuracy: tensor([0.3009]), points: 46
----------
TEST Dominio: psychology
accuracy: tensor([0.2681]), points: 33


In [35]:
model_path = os.getcwd() + f'/trained_models/lstm_cnn_qa_{CATEGORY}'
torch.save(lstm_cnn_qa.state_dict(), model_path)

### Evaluacion

In [15]:
logistic_regressor = LogisticRegression(trainset.max_length, 1)
lstm = BasicLSTM(len(vocab), 64, trainset.max_length, 1, embedding_dim=100)
bilstm = BiLSTM_model(embedding_matrix.shape[1], embedding_matrix.shape[0], 1, 
                     pretrained_embeddings=embedding_matrix, max_length=trainset.max_length)

models = [logistic_regressor, lstm, bilstm]
paths = [os.getcwd() + f'/trained_models/logistic_regressor_{CATEGORY}', 
         os.getcwd() + f'/trained_models/basic_lstm_{CATEGORY}',         
         os.getcwd() + f'/trained_models/bilstm_{CATEGORY}']

print(paths[0])

for i, model in enumerate(models):
    model.load_state_dict(torch.load(paths[i]))
    model.eval()
    acc, points, acc_list, points_list = evaluate_better(model, dev_categ, trainset.encode, evaluator)
    print('DEV')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    acc, points, acc_list, points_list = evaluate_better(model, test_categ, trainset.encode, evaluator)
    print('TEST')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    print() 

C:\Users\tec005m\mds\TFM\head-qa-afi\code/trained_models/logistic_regressor_psychology




DEV
Accuracy media 0.24336283
Puntos media -6.0
[tensor(0.2434)]
[-6]
---------
TEST
Accuracy media 0.24169081
Puntos media -7.5
[tensor(0.2478), tensor(0.2356)]
[-2, -13]
---------

DEV
Accuracy media 0.3141593
Puntos media 58.0
[tensor(0.3142)]
[58]
---------
TEST
Accuracy media 0.25927538
Puntos media 8.5
[tensor(0.2652), tensor(0.2533)]
[14, 3]
---------

DEV
Accuracy media 0.24778761
Puntos media -2.0
[tensor(0.2478)]
[-2]
---------
TEST
Accuracy media 0.24173912
Puntos media -7.5
[tensor(0.2435), tensor(0.2400)]
[-6, -9]
---------



In [24]:
lstm_qa = LSTM_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)
lstm_cnn_qa = LSTM_CNN_QA(vocab_size=len(vocab), hidden_size=64, x_size=trainset.max_length, n_classes=1, embedding_size=300,
               pretrained_embeddings=embedding_matrix)

models = [lstm_qa, lstm_cnn_qa]

paths = [os.getcwd() + f'/trained_models/lstm_qa_{CATEGORY}',
         os.getcwd() + f'/trained_models/lstm_cnn_qa_{CATEGORY}'
        ]

for i, model in enumerate(models):
    model.load_state_dict(torch.load(paths[i]))
    model.eval()
    acc, points, acc_list, points_list = evaluate_better(model, dev_categ, trainset.encode, evaluator_ir)
    print('DEV')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    acc, points, acc_list, points_list = evaluate_better(model, test_categ, trainset.encode, evaluator_ir)
    print('TEST')
    print('Accuracy media', acc)
    print('Puntos media', points)
    print(acc_list)
    print(points_list)
    print('---------')
    print() 

Loading pretrained embeddings...
Loading pretrained embeddings...
DEV
Accuracy media 0.22123894
Puntos media -26.0
[tensor(0.2212)]
[-26]
---------
TEST
Accuracy media 0.26367152
Puntos media 12.5
[tensor(0.2696), tensor(0.2578)]
[18, 7]
---------

DEV
Accuracy media 0.30088496
Puntos media 46.0
[tensor(0.3009)]
[46]
---------
TEST
Accuracy media 0.2679227
Puntos media 16.5
[tensor(0.2870), tensor(0.2489)]
[34, -1]
---------

