In [13]:
import os
import time
import datetime
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader, TensorDataset, SequentialSampler, RandomSampler
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from utils_data import filter_by_category, save_dataset_to_pickle, load_dataset_from_pickle
from training import evaluate, validate_ir, evaluator_bert_ir, encoder_bert_ir, encoder_bert_ir_instance

import transformers
from transformers.optimization import AdamW
from transformers import BertForSequenceClassification, BertConfig, BertTokenizer, BertModel, BertForMaskedLM

from unsupervised_models import BERTSimilarity


%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
from datasets import load_dataset

data_es = load_dataset('head_qa', 'es' )

Reusing dataset head_qa (C:\Users\tec005m\.cache\huggingface\datasets\head_qa\es\1.1.0\473dc5357942a3ff52963bd73cad0d167bd1bbc1ca5ca0732ee7372b480dd735)


In [15]:
training, validation, testing = data_es['train'], data_es['validation'], data_es['test']

In [16]:
# training_instances = parse_dataset_ir(training)
# validation_instances = parse_dataset_ir(validation)
# testing_instances = parse_dataset_ir(testing)

# oversampled_training = random_oversamplig(training_instances)

In [17]:
training_instances = load_dataset_from_pickle('../data/training_ir.pickle')
validation_instances = load_dataset_from_pickle('../data/validation_ir.pickle')
testing_instances = load_dataset_from_pickle('../data/testing_ir.pickle')
oversampled_training = load_dataset_from_pickle('../data/oversampled_training_ir.pickle')

In [18]:
categories = ['biology', 'nursery', 'pharmacology', 'medicine', 'psychology', 'chemistry']

In [19]:
BASE_BERT = 'dccuchile/bert-base-spanish-wwm-cased'
tokenizer = BertTokenizer.from_pretrained(BASE_BERT, do_lower_case=False)
model = BERTSimilarity(pretrained_model=BASE_BERT)
batch_size = 8

Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.we

In [None]:
results = {}
for CATEGORY in categories:
    print(f"{CATEGORY} ...")
    
    results[CATEGORY] = {}
    
    validation_categ = filter_by_category(validation_instances, category=CATEGORY)
    testing_categ = filter_by_category(testing_instances, category=CATEGORY)

    dev_categ = filter_by_category(validation, category=CATEGORY)
    test_categ = filter_by_category(testing, category=CATEGORY)
    
    print(f"Validation on DEV ...")
    
    acc, points = evaluate(model, dev_categ, encoder_bert_ir_instance, evaluator_bert_ir)
    results[CATEGORY]['Accuracy_Dev'] = acc
    results[CATEGORY]['Points_Dev'] = points   
    
    print(f"DEV {CATEGORY}")
    print(f'Accuracy: {acc}')
    print(f'Points: {points}')
    print()
        
    print(f"Validation on TEST ...")    
    acc, points =  evaluate(model, test_categ, encoder_bert_ir_instance, evaluator_bert_ir)
    results[CATEGORY]['Accuracy_Test'] = acc
    results[CATEGORY]['Points_Test'] = points
    
    print(f"TEST {CATEGORY}")
    print(f'Accuracy: {acc}')
    print(f'Points: {points}')
    print()
    print('-------------------')
    print()

biology ...
Validation on DEV ...
DEV biology
Accuracy: 0.22566372156143188
Points: -22

Validation on TEST ...
TEST biology
Accuracy: 0.21365638077259064
Points: -66

-------------------

nursery ...
Validation on DEV ...
DEV nursery
Accuracy: 0.25217390060424805
Points: 2

Validation on TEST ...
TEST nursery
Accuracy: 0.2461538463830948
Points: -7

-------------------

pharmacology ...
Validation on DEV ...
DEV pharmacology
Accuracy: 0.2133333384990692
Points: -33

Validation on TEST ...
TEST pharmacology
Accuracy: 0.20787745714187622
Points: -77

-------------------

medicine ...
Validation on DEV ...
DEV medicine
Accuracy: 0.21645021438598633
Points: -31

Validation on TEST ...


In [None]:
save_dataset_to_pickle('results_unsupervised_model_category.pickle', results)