In [1]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
roberta = 'deepset/roberta-base-squad2'

In [4]:
# load model and tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)

In [14]:
# get predictions
nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)

qa_input = {
    'question': 'How many pretrained models are available in Transformers?',
    'context': r"""Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch."""
}

res = nlp(qa_input)

In [15]:
res

{'score': 0.4159207046031952, 'start': 253, 'end': 261, 'answer': 'over 32+'}

In [3]:
import pandas as pd
import numpy as np

In [63]:
df_faq = pd.read_csv('faq-data/df_faq.csv', index_col=0)
df_faq['Question'] = '(' + df_faq['Type'] + ') ' + df_faq['Question']
df_faq['Answer'] = '(' + df_faq['Type'] + ') ' + df_faq['Answer']

# Sentence Transformers

In [4]:
from sentence_transformers import SentenceTransformer

questions = df_faq['Question']
answers = df_faq['Answer']

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'df_faq' is not defined

In [68]:
sentence_transformer = SentenceTransformer('bert-base-nli-mean-tokens')
question_embeddings = sentence_transformer.encode(questions)
question_embeddings.shape

(175, 768)

In [69]:
from sklearn.metrics.pairwise import cosine_similarity

def get_qQ_similarities(query):
    query_embedding = sentence_transformer.encode([query])

    similarities = cosine_similarity(
        question_embeddings, query_embedding
    )

    df = pd.DataFrame(similarities, columns = ['Similarity'])
    df['Question'] = questions
    df = df.sort_values(by='Similarity', ascending=False)

    return df

In [73]:
get_qQ_similarities('When is the application deadline for scholarships?')

Unnamed: 0,Similarity,Question
22,0.774812,(International) How can I apply for a scholars...
18,0.771021,(International) What is the University’s appli...
124,0.757958,(AAO) Who will consider my course enrolment an...
168,0.748967,(AAO) Are there any bursaries or scholarships ...
125,0.718374,(AAO) When the course selection status reads “...


In [77]:
answer_embeddings = sentence_transformer.encode(answers)
answer_embeddings.shape

(175, 768)

In [78]:
def get_qA_similarities(query):
    query_embedding = sentence_transformer.encode([query])

    similarities = cosine_similarity(
        answer_embeddings, query_embedding
    )

    df = pd.DataFrame(similarities, columns = ['Similarity'])
    df['Answer'] = answers
    df = df.sort_values(by='Similarity', ascending=False)

    return df

In [86]:
get_qA_similarities('How much does the programme cost?')

Unnamed: 0,Similarity,Answer
17,0.484508,(HKDSE (Non-local)) You may refer to our Fees ...
126,0.483337,(AAO) Under their SIS menu -> Enrollment -> En...
150,0.482735,(AAO) GPA is the abbreviation of Grade Point A...
125,0.479445,(AAO) It means it is still pending for approva...
24,0.475474,(International) You may refer to our Fees and ...
...,...,...
74,0.141538,(BASc) There is no particular subject requirem...
78,0.134559,"(BASc) At HKU, applicants will be competing wi..."
36,0.116894,(BSc 6901) No. Students are free to choose an...
31,0.048570,(BSc 6901) one programme code with a choice of...


In [1]:
import numpy as np
import pandas as pd

In [130]:
# create new column representing relevance between each QA pair
df_faq['QA Relevance'] = 1

In [271]:
# generate 24 negative samples for each QA pair
import random

def generate_negative_samples(df):
    questions = df['Question']
    answers = df['Answer']

    df_negative = pd.DataFrame()
    for i, q in enumerate(questions):
        remaining_answers = answers.drop(index=i)
        df_negative_samples = pd.DataFrame(random.sample(list(remaining_answers), 2), columns=['Answer'])
        df_negative_samples['Question'] = q
        df_negative_samples['QA Relevance'] = 0
        df_negative = df_negative.append(df_negative_samples)
    
    return df_negative

In [272]:
# generate the final dataset with negative samples included

df_negative = generate_negative_samples(df_faq)

# df_faq = df_faq.drop(columns=['Type'])
df_final = df_faq.append(df_negative).reset_index(drop = True)

# rename the columns to the required 'text_a', 'text_b', 'labels'
df_final = df_final.rename(columns={'Question': 'text_a', 'Answer': 'text_b', 'QA Relevance': 'labels'})

In [274]:
df_final.to_csv('faq-data/df_final.csv')

In [2]:
df_final = pd.read_csv('data/df_final.csv', index_col=0)

In [11]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df_final, test_size=0.2, random_state=7600, shuffle=True)

In [5]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger('transformers')
transformers_logger.setLevel(logging.WARNING)

In [7]:
import torch
cuda_available = torch.cuda.is_available()

**Trial 1: ROBERTA using 24 negative samples per positive sample (same as in the paper)**
- N_EPOCHS = 5

In [238]:
# define model configuration parameters
N_EPOCHS = 5

# configure the classification model
bert_qA_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [241]:
# define the model
bert_qA = ClassificationModel('roberta', 'roberta-base', use_cuda=cuda_available, args=bert_qA_args)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

In [243]:
bert_qA.train_model(df_train)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 7/3500 [00:06<50:31,  1.15it/s]  
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3
Epochs 0/5. Running Loss:    0.0232: 100%|██████████| 438/438 [00:26<00:00, 16.49it/s]
Epochs 1/5. Running Loss:    0.0157: 100%|██████████| 438/438 [00:25<00:00, 17.41it/s]
Epochs 2/5. Running Loss:    2.2101: 100%|██████████| 438/438 [00:24<00:00, 17.61it/s]
Epochs 3/5. Running Loss:    0.0162: 100%|██████████| 438/438 [00:25<00:00, 17.19it/s]
Epochs 4/5. Running Loss:    0.0152: 100%|██████████| 438/438 [00:27<00:00, 15.89it/s]
Epoch 5 of 5: 100%|██████████| 5/5 [02:18<00:00, 27.75s/it]
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.


(2190, 0.1949672647807152)

In [253]:
t1_result, t1_model_outputs, t1_wrong_predictions = bert_qA.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 2/875 [00:04<29:06,  2.00s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
Running Evaluation: 100%|██████████| 110/110 [00:01<00:00, 83.64it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 0, 'tn': 841, 'fp': 0, 'fn': 34, 'auroc': 0.5, 'auprc': 0.038857142857142854, 'eval_loss': 0.18288354006680577}


In [21]:
qA_test_samples = [
    [
        'What is my application deadline?', 
        'You may wish to go through the Important Dates when applying to the University. Applications submitted after the first round application deadline will be considered on a rolling basis subject to programme availability.'
    ],

    [
        'When will I know the result of my application?',
        'You may wish to go through the Important Dates when applying to the University. Applications submitted after the first round application deadline will be considered on a rolling basis subject to programme availability.'
    ],

    [
        'Hi',
        'You may wish to go through the Important Dates when applying to the University. Applications submitted after the first round application deadline will be considered on a rolling basis subject to programme availability.'
    ]
]

In [309]:
t1_predictions, t1_raw_outputs = bert_qA.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:03<00:07,  3.62s/it]
100%|██████████| 1/1 [00:00<00:00,  3.43it/s]


In [310]:
t1_predictions

array([0, 0, 0], dtype=int64)

**Trial 2: RoBERTA with 2 negative samples per positive sample**
- N_EPOCHS = 7

In [291]:
# configure the classification model
N_EPOCHS = 7
roberta_qA_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [292]:
roberta_qA = ClassificationModel('roberta', 'roberta-base', use_cuda=cuda_available, args=roberta_qA_args)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

In [294]:
roberta_qA.train_model(df_train, output_dir='outputs/roberta-t2')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 1/420 [00:03<25:34,  3.66s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3
Epochs 0/7. Running Loss:    1.0736: 100%|██████████| 53/53 [00:03<00:00, 16.00it/s]
Epochs 1/7. Running Loss:    0.7789: 100%|██████████| 53/53 [00:03<00:00, 16.48it/s]
Epochs 2/7. Running Loss:    1.2007: 100%|██████████| 53/53 [00:03<00:00, 16.74it/s]
Epochs 3/7. Running Loss:    0.0070: 100%|██████████| 53/53 [00:03<00:00, 16.12it/s]
Epochs 4/7. Running Loss:    0.0018: 100%|██████████| 53/53 [00:03<00:00, 16.16it/s]
Epochs 5/7. Running Loss:    0.0013: 100%|██████████| 53/53 [00:03<00:00, 16.29it/s]
Epochs 6/7. Running Loss:    0.0012: 100%|██████████| 53/53 [00:03<00:00, 16.06it/s]
Epoch 7 of 7: 100%|██████████| 7/7 [00:35<00:00,  5.10s/it]
INFO:simpletransformers.classification.classificatio

(371, 0.27318945784131793)

In [295]:
t2_result, t2_model_outputs, t2_wrong_predictions = roberta_qA.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/105 [00:03<06:25,  3.71s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
Running Evaluation: 100%|██████████| 14/14 [00:00<00:00, 61.35it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.7880977119810487, 'tp': 25, 'tn': 71, 'fp': 5, 'fn': 4, 'auroc': 0.9686932849364791, 'auprc': 0.9395412408405989, 'eval_loss': 0.42512241857392447}


In [307]:
t2_predictions, t2_raw_outputs = roberta_qA.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:03<00:07,  3.61s/it]
100%|██████████| 1/1 [00:00<00:00,  3.12it/s]


In [337]:
# test some tricky sentences
query = 'I wanna know what the application deadline is.'
available_answers = df_faq['Answer']

qa_test_samples_tricky = []
for a in available_answers:
    qa_test_samples_tricky.append([query, a])

# get the predictions
t2_predictions_tricky, t2_raw_outputs_tricky = roberta_qA.predict(qa_test_samples_tricky)

# format the predictions into an easy-to-read dataframe
df_t2_predictions_tricky = pd.DataFrame(qa_test_samples_tricky, columns=['User Query', 'Answer in Database'])
df_t2_predictions_tricky['Question in Database'] = df_faq['Question']
df_t2_predictions_tricky['Predicted QA Relevance'] = t2_predictions_tricky

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/175 [00:03<10:33,  3.64s/it]
100%|██████████| 22/22 [00:00<00:00, 35.57it/s]


In [1]:
df_t2_predictions_tricky.to_csv('outputs/roberta-t2/df_t2_predictions_tricky.csv')

NameError: name 'df_t2_predictions_tricky' is not defined

**Trial 3: ROBERTA initially trained on SQuAD (but not for classification purposes)**
- n_epochs = 12

In [24]:
# configure the classification model
N_EPOCHS = 12
roberta_qA_squad2_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [25]:
roberta_qA_squad2 = ClassificationModel('roberta', 'deepset/roberta-base-squad2', use_cuda=cuda_available, args=roberta_qA_squad2_args)

Some weights of the model checkpoint at deepset/roberta-base-squad2 were not used when initializing RobertaForSequenceClassification: ['qa_outputs.bias', 'qa_outputs.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at deepset/roberta-base-squad2 and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to

In [27]:
roberta_qA_squad2.train_model(df_train, output_dir='outputs/roberta-t3-squad2')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 1/420 [00:02<17:44,  2.54s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3
Epochs 0/12. Running Loss:    1.0514: 100%|██████████| 53/53 [00:03<00:00, 14.58it/s]
Epochs 1/12. Running Loss:    0.0050: 100%|██████████| 53/53 [00:03<00:00, 16.10it/s]
Epochs 2/12. Running Loss:    0.0099: 100%|██████████| 53/53 [00:03<00:00, 16.11it/s]
Epochs 3/12. Running Loss:    0.6107: 100%|██████████| 53/53 [00:03<00:00, 16.67it/s]
Epochs 4/12. Running Loss:    0.0019: 100%|██████████| 53/53 [00:03<00:00, 16.34it/s]
Epochs 5/12. Running Loss:    0.0063: 100%|██████████| 53/53 [00:03<00:00, 16.25it/s]
Epochs 6/12. Running Loss:    0.0002: 100%|██████████| 53/53 [00:03<00:00, 16.37it/s]
Epochs 7/12. Running Loss:    0.0002: 100%|██████████| 53/53 [00:03<00:00, 16.24it/s]
Epochs 8/12. Runnin

(636, 0.09954484901244535)

In [28]:
t3_result, t3_model_outputs, t3_wrong_predictions = roberta_qA_squad2.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/105 [00:03<05:39,  3.26s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
Running Evaluation: 100%|██████████| 14/14 [00:00<00:00, 34.54it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.78353372286637, 'tp': 24, 'tn': 72, 'fp': 4, 'fn': 5, 'auroc': 0.9516787658802179, 'auprc': 0.9372552124290898, 'eval_loss': 0.6542652781520572}


In [29]:
t3_predictions, t3_raw_outputs = roberta_qA_squad2.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:02<00:04,  2.49s/it]
100%|██████████| 1/1 [00:00<00:00, 10.52it/s]


In [30]:
t3_predictions

array([1, 1, 0], dtype=int64)

**Trial 4**

In [17]:
# configure the classification model
N_EPOCHS = 20
mpnet_qA_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [18]:
mpnet_qA = ClassificationModel('mpnet', 'sentence-transformers/all-mpnet-base-v2', use_cuda=cuda_available, args=mpnet_qA_args)

Some weights of the model checkpoint at sentence-transformers/all-mpnet-base-v2 were not used when initializing MPNetForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing MPNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MPNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/all-mpnet-base-v2 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a

In [19]:
mpnet_qA.train_model(df_train, output_dir='outputs/mpnet-t4')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 1/420 [00:02<17:57,  2.57s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_mpnet_128_2_3
Epochs 0/20. Running Loss:    0.9338: 100%|██████████| 53/53 [00:03<00:00, 15.32it/s]
Epochs 1/20. Running Loss:    0.3824: 100%|██████████| 53/53 [00:03<00:00, 16.71it/s]
Epochs 2/20. Running Loss:    0.0786: 100%|██████████| 53/53 [00:03<00:00, 16.37it/s]
Epochs 3/20. Running Loss:    0.0244: 100%|██████████| 53/53 [00:03<00:00, 16.52it/s]
Epochs 4/20. Running Loss:    1.1250: 100%|██████████| 53/53 [00:03<00:00, 16.93it/s]
Epochs 5/20. Running Loss:    0.0063: 100%|██████████| 53/53 [00:03<00:00, 16.76it/s]
Epochs 6/20. Running Loss:    0.0050: 100%|██████████| 53/53 [00:03<00:00, 16.63it/s]
Epochs 7/20. Running Loss:    0.0035: 100%|██████████| 53/53 [00:03<00:00, 16.72it/s]
Epochs 8/20. Running 

(1060, 0.10138939568456613)

In [20]:
t4_result, t4_model_outputs, t4_wrong_predictions = mpnet_qA.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/105 [00:03<05:47,  3.34s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_mpnet_128_2_3
Running Evaluation: 100%|██████████| 14/14 [00:00<00:00, 42.07it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5994931313360115, 'tp': 21, 'tn': 67, 'fp': 9, 'fn': 8, 'auroc': 0.8888384754990926, 'auprc': 0.8193617097508505, 'eval_loss': 0.9931466409138271}


In [22]:
t4_predictions, t4_raw_outputs = mpnet_qA.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:02<00:05,  2.57s/it]
100%|██████████| 1/1 [00:00<00:00,  3.35it/s]


In [23]:
t4_predictions

array([1, 1, 1], dtype=int64)

In [35]:
predictions_tricky

Unnamed: 0,User Query,Answer in Database,Question in Database
0,I wanna know what the application deadline is.,We welcome your application to HKU through the...,How do I apply to HKU through JUPAS scheme?
1,I wanna know what the application deadline is.,Students should not forget that in addition to...,What are the common mistakes as a JUPAS applic...
2,I wanna know what the application deadline is.,All students who apply to HKU on the basis of ...,How can I apply to HKU as a HKDSE repeater?
3,I wanna know what the application deadline is.,To have your application considered for admiss...,What are the minimum university entrance requi...
4,I wanna know what the application deadline is.,"Starting from the academic year 2020/2021, HKU...",How is the admission score calculated?
...,...,...,...
170,I wanna know what the application deadline is.,You might try to look at the FAQ compiled by t...,I still have other questions regarding the set...
171,I wanna know what the application deadline is.,Please visit the website of the Scholarships O...,Are there scholarships that accept application...
172,I wanna know what the application deadline is.,You need to apply for leave of absence if you ...,When and how do I apply for leave of absence?
173,I wanna know what the application deadline is.,"To put it simply, plagiarism is defined as the...",What is plagiarism and what happens if I am fo...


In [37]:
# test some tricky sentences
query = 'I wanna know what the application deadline is.'
available_answers = df_faq['Answer']

qa_test_samples_tricky = []
for a in available_answers:
    qa_test_samples_tricky.append([query, a])

# get the predictions
t4_predictions_tricky, t4_raw_outputs_tricky = mpnet_qA.predict(qa_test_samples_tricky)

# format the predictions into an easy-to-read dataframe
predictions_tricky = pd.DataFrame(qa_test_samples_tricky, columns=['User Query', 'Answer in Database'])
predictions_tricky['Question in Database'] = df_faq['Question']
predictions_tricky['Predicted QA Relevance'] = t4_predictions_tricky

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/175 [00:02<07:24,  2.56s/it]
100%|██████████| 22/22 [00:00<00:00, 34.61it/s]


In [40]:
predictions_tricky.sort_values(by='Predicted QA Relevance', ascending=False).to_csv('outputs/mpnet-t4/df_t4_predictions_tricky.csv')

**Trial 5: BERT**

In [24]:
# configure the classification model
N_EPOCHS = 10
bert_qA_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [25]:
bert_qA = ClassificationModel('bert', 'bert-base-uncased', use_cuda=cuda_available, args=bert_qA_args)

Downloading: 100%|██████████| 570/570 [00:00<00:00, 569kB/s]
Downloading: 100%|██████████| 420M/420M [00:21<00:00, 20.8MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertFor

In [26]:
bert_qA.train_model(df_train, output_dir='outputs/bert-t5')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 1/420 [00:02<18:08,  2.60s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_bert_128_2_3
Epochs 0/10. Running Loss:    0.9420: 100%|██████████| 53/53 [00:03<00:00, 16.32it/s]
Epochs 1/10. Running Loss:    0.5595: 100%|██████████| 53/53 [00:02<00:00, 17.94it/s]
Epochs 2/10. Running Loss:    0.0569: 100%|██████████| 53/53 [00:02<00:00, 18.32it/s]
Epochs 3/10. Running Loss:    0.8850: 100%|██████████| 53/53 [00:02<00:00, 17.96it/s]
Epochs 4/10. Running Loss:    0.8934: 100%|██████████| 53/53 [00:02<00:00, 17.94it/s]
Epochs 5/10. Running Loss:    0.0277: 100%|██████████| 53/53 [00:02<00:00, 17.98it/s]
Epochs 6/10. Running Loss:    0.0058: 100%|██████████| 53/53 [00:03<00:00, 17.25it/s]
Epochs 7/10. Running Loss:    0.0051: 100%|██████████| 53/53 [00:02<00:00, 17.84it/s]
Epochs 8/10. Running L

(530, 0.2881794899702072)

In [27]:
t5_result, t5_model_outputs, t5_wrong_predictions = bert_qA.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/105 [00:03<05:43,  3.30s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_bert_128_2_3
Running Evaluation: 100%|██████████| 14/14 [00:00<00:00, 62.44it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5808188131873345, 'tp': 21, 'tn': 66, 'fp': 10, 'fn': 8, 'auroc': 0.8584392014519056, 'auprc': 0.7138490267997438, 'eval_loss': 0.7921056428125927}


In [28]:
t5_predictions, t5_raw_outputs = bert_qA.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:02<00:05,  2.54s/it]
100%|██████████| 1/1 [00:00<00:00,  4.29it/s]


In [30]:
t5_predictions

array([1, 1, 1], dtype=int64)

In [31]:
# configure the classification model
N_EPOCHS = 7
bert_qA_pretrained_args = ClassificationArgs(num_train_epochs=N_EPOCHS)

In [32]:
bert_qA_pretrained = ClassificationModel('roberta', 'data/uncased_L-12_H-768_A-12', use_cuda=cuda_available, args=bert_qA_pretrained_args)

OSError: Can't load config for 'data/uncased_L-12_H-768_A-12'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'data/uncased_L-12_H-768_A-12' is the correct path to a directory containing a config.json file

In [None]:
roberta_qA_squad2.train_model(df_train, output_dir='outputs/roberta-t3-squad2')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  0%|          | 1/420 [00:02<17:44,  2.54s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3
Epochs 0/12. Running Loss:    1.0514: 100%|██████████| 53/53 [00:03<00:00, 14.58it/s]
Epochs 1/12. Running Loss:    0.0050: 100%|██████████| 53/53 [00:03<00:00, 16.10it/s]
Epochs 2/12. Running Loss:    0.0099: 100%|██████████| 53/53 [00:03<00:00, 16.11it/s]
Epochs 3/12. Running Loss:    0.6107: 100%|██████████| 53/53 [00:03<00:00, 16.67it/s]
Epochs 4/12. Running Loss:    0.0019: 100%|██████████| 53/53 [00:03<00:00, 16.34it/s]
Epochs 5/12. Running Loss:    0.0063: 100%|██████████| 53/53 [00:03<00:00, 16.25it/s]
Epochs 6/12. Running Loss:    0.0002: 100%|██████████| 53/53 [00:03<00:00, 16.37it/s]
Epochs 7/12. Running Loss:    0.0002: 100%|██████████| 53/53 [00:03<00:00, 16.24it/s]
Epochs 8/12. Runnin

(636, 0.09954484901244535)

In [None]:
t3_result, t3_model_outputs, t3_wrong_predictions = roberta_qA_squad2.eval_model(
    df_test
)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
  1%|          | 1/105 [00:03<05:39,  3.26s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
Running Evaluation: 100%|██████████| 14/14 [00:00<00:00, 34.54it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.78353372286637, 'tp': 24, 'tn': 72, 'fp': 4, 'fn': 5, 'auroc': 0.9516787658802179, 'auprc': 0.9372552124290898, 'eval_loss': 0.6542652781520572}


In [None]:
t3_predictions, t3_raw_outputs = roberta_qA_squad2.predict(qA_test_samples)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
 33%|███▎      | 1/3 [00:02<00:04,  2.49s/it]
100%|██████████| 1/1 [00:00<00:00, 10.52it/s]


In [None]:
t3_predictions

array([1, 1, 0], dtype=int64)