# RoBERTa Regression

In [2]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import torch
from collections import defaultdict, Counter
import random
import math
import pickle

import src.eval_metric
import src.model
import src.dataloader

%matplotlib inline
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [73]:
train_df = pd.read_csv("../../data/training_data/train.csv")
valid_df = pd.read_csv("../../data/training_data/dev.csv")
test_df = pd.read_csv("../../data/test_data/test_task1.csv")

## Fine-tune model

In [74]:
print(valid_df['text_name'].unique())

['ZuCo1' 'ZuCo2' 'Provo' 'BSC' 'RSC' 'PAHEC' 'PoTeC' 'GECO-NL']


In [75]:
text_name = 'Provo'

In [76]:
model_trainer = src.model.ModelTrainer(text_name=text_name)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [77]:
model_trainer.train(train_df, valid_df, num_epochs=100)

Epoch: 1
MAE for FFDAvg: 21.22703362922555
MAE for FFDStd: 3.7296371058081004
MAE for TRTAvg: 30.763492068145155
MAE for TRTStd: 13.381674874998977
Overall MAE: 17.275459419544443
Epoch: 2
MAE for FFDAvg: 18.70604962580718
MAE for FFDStd: 3.0570409792957283
MAE for TRTAvg: 28.782050153109907
MAE for TRTStd: 11.630834067055766
Overall MAE: 15.543993706317144
Epoch: 3
MAE for FFDAvg: 17.961118737330054
MAE for FFDStd: 3.0028437763509963
MAE for TRTAvg: 27.785325892629125
MAE for TRTStd: 10.917190642044515
Overall MAE: 14.916619762088672
Epoch: 4
MAE for FFDAvg: 17.44839219199619
MAE for FFDStd: 2.9793707394343643
MAE for TRTAvg: 27.317246008827816
MAE for TRTStd: 10.731964769427497
Overall MAE: 14.619243427421468
Epoch: 5
MAE for FFDAvg: 17.166145969374675
MAE for FFDStd: 2.924629657691612
MAE for TRTAvg: 26.99689797504827
MAE for TRTStd: 10.634780087527938
Overall MAE: 14.430613422410623
Epoch: 6
MAE for FFDAvg: 16.935224402888316
MAE for FFDStd: 2.9038665961430783
MAE for TRTAvg: 26.71

## Make predictions

In [69]:
test_df = test_df[test_df.text_name == text_name]

In [70]:
predict_df = model_trainer.test(test_df)

ValueError: need at least one array to concatenate

In [71]:
predict_df

Unnamed: 0,language,sentence_id,word_id,word,text_name,FFDAvg,FFDStd,TRTAvg,TRTStd
994,en,3,0.0,With,ZuCo2,14.201734,6.923609,19.918240,14.100842
995,en,3,1.0,his,ZuCo2,12.138766,3.690258,18.772739,9.896083
996,en,3,2.0,interest,ZuCo2,12.235614,4.245877,20.214228,13.504408
997,en,3,3.0,in,ZuCo2,12.606339,4.326952,15.290634,6.981796
998,en,3,4.0,race,ZuCo2,13.503754,5.206844,20.432705,12.336987
...,...,...,...,...,...,...,...,...,...
2116,en,346,22.0,known,ZuCo2,12.836598,4.043539,17.862795,8.498557
2117,en,346,23.0,as,ZuCo2,12.745499,4.463977,13.680372,5.261281
2118,en,346,24.0,the,ZuCo2,11.971578,3.904126,13.483535,5.500702
2119,en,346,25.0,Bush,ZuCo2,12.097002,3.669006,15.308820,6.885437


In [None]:
predict_df.to_csv(text_name+"_predictions.csv", index=False)

In [None]:
src.eval_metric.evaluate(predict_df, valid_df)