# Preparing the Environment

In [2]:
%%capture
!pip install transformers
!pip install nltk

In [3]:
import math
import os
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset
import torch
import random
import nltk

nltk.download('punkt')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
random_seed = 2023
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

# Preprocessing the Datasets

In [6]:
train_df = pd.read_csv('input/train_pub.csv')
dev_df = pd.read_csv('input/dev_pub.csv')
test_df = pd.read_csv('input/test_pub.csv')

In [7]:
train_df.head()

Unnamed: 0,response_id,rating_chooses_appropriate_action,rating_commits_to_action,rating_gathers_information,rating_identifies_issues_opportunities,rating_interprets_information,rating_involves_others,rating_decision_making_final_score,text_exercise_4,text_exercise_5,text_exercise_6,text_exercise_7,text_exercise_8,text_exercise_9,text_exercise_10,text_exercise_11,text_exercise_12,text_exercise_13,text_exercise_14,text_exercise_15,text_exercise_16,text_exercise_17,text_exercise_18,text_exercise_19,text_exercise_final
0,NlmLRJzhnNt,2.0,2,2,2.0,2,3.0,2,,"Hi Tracy, This sounds very ionteresting, prior...",,,"Hi Taylor: That sounds like a great idea, go a...",Taylor/Kim: I sent you an email earlier regard...,"Haley, I do not like to base a promotion based...",,,Kim/Taylor: At this point you are far more fam...,,,"Taylor, POlease hold off on collecting hand wr...",,"Taylor, Please hold off on contact Tom Weedler...","Kim, Please meet with HR Liang and Tracy to fi...",See below for answers.Original Message----- Fr...
1,mQoZghwfMUT,4.0,3,4,3.0,4,2.0,6,,"Hi Tracy, I am happy to see we have a self mot...",,,"Hi Talyor, No I do not think it is a good idea...",,"Hi Mr. Roth, I have chosen Blaine. His perform...",,,,,,"Hi Taylor, Stop your investigation immediately...",,"Hi Taylor, It is possible that everything is a...","Liang, Stop what you are doing. First of all, ...",Original Message----- From: Assessment Adminis...
2,FjIOCrTtDgF,2.0,2,2,2.0,2,2.0,2,,,J.J please can you work with her for two more ...,J.J We will look at them together Thank You Ja...,,I will look into this matter. Thank You Jamie ...,,"Great, I think room BP07 AT 12:00pm to 1:00pm ...",Thank you I see something I can do. Thank You...,,Thank you for keeping me in the know. Keep up ...,,,Thank You I'll take care of it. Thank You Jami...,,"Wait , Let me go talk to J.J . before you do a...","No, Focus and Team work I need to talk with th..."
3,CQGLdZnCHcZ,3.0,3,3,2.0,2,2.0,3,,,"Hi Paxton Let me talk to Debby first, before w...",Hi Patton why don't we get these folks and giv...,,Hi Norman we have to suspend Larry and harry o...,,Hi Foster let have the meeting at 2:00 to 3:00...,"Jennifer the infomation you sent me, it looks ...",,", Hi Paxton we can get together on april 20 ar...",Hi Isaac let me see what kind of schadule I ha...,,Hi Bern I think is better is we take both to l...,,H Daisha you did the right thing thank you for...,
4,LERqIHLgqYm,3.0,3,2,3.0,2,2.0,3,,,,JJ HR along with myself and the Final Assembly...,,Alex I appreciate the heads up on this. I will...,,Shea Based on my recent calendar revisions and...,Jennifer Thank you for the March repair report...,,JJ Thank you for all your efforts and the info...,"Isaac By insatlling the software on the 15th, ...",,,,Vic I suggest you approachHR before taking thi...,Original Message----- From: Assessment Adminis...


In [8]:
dev_df.head()

Unnamed: 0,response_id,rating_chooses_appropriate_action,rating_commits_to_action,rating_gathers_information,rating_identifies_issues_opportunities,rating_interprets_information,rating_involves_others,rating_decision_making_final_score,text_exercise_4,text_exercise_5,text_exercise_6,text_exercise_7,text_exercise_8,text_exercise_9,text_exercise_10,text_exercise_11,text_exercise_12,text_exercise_13,text_exercise_14,text_exercise_15,text_exercise_16,text_exercise_17,text_exercise_18,text_exercise_19,text_exercise_final
0,XSEKmbXmmZi,,,,,,,,,,,,,Alex: Thank you for bringing this incident to ...,,Shea: Thank you for giving me a heads up on so...,,,,,,,,Vic: I appreciate you informing me of your obs...,"1) Being a new employee, I decided to read thr..."
1,OTigqBfxROr,,,,,,,,,,"J.J., Thank you for bringing this to my attent...","J.J. I understand your concern, but that is a ...",,"Alex, Thank you I will take care of this right...",,"Shea, Thank you for the heads up about my team...","Jennifer, Thank you for the information. Based...",,"Hello J.J., That sounds great. I really apprec...","Isaac, We are very excited to test the new sof...",,,,,1. I initially tried to handle each challenge ...
2,lMGidYKEOBv,,,,,,,,,,"Hi J.J, Unfortunately, moving Debby Britzki to...","J.J, I didnt appreciate your age discriminatio...",,"Hi Norma, I'm sorry to hear that, and I will f...",,Thank you Shea. I would choose 12:00 p.m. 1:0...,Thank you Jennifer for your feedback. I will c...,,Yes defiantly I would like to set down with yo...,"Hi Isaac, Thank you, and I too looking forward...",,Thank you Paul. I wil let Cory know. I appreci...,,"Daisha, I would like to be their when you talk...",Communication issues: This is an issue because...
3,lEmnRmxniLl,,,,,,,,,,Hey JJ It sounds like Debby's been a problem f...,"Hey JJ, Integrating the Kirkland plant people ...",,,,"Shea, We'll take BP07 during lunch time, since...","Thanks Jennifer, It seems like reducing absent...",,"Hey J.J., That sounds like a great idea! I'd l...","Everyone, It sounds like we've got a detailed ...",,,,"Vic, I think I know about the project JJ is re...",Responses after questions below:Original Messa...
4,OhnFgbmEVqG,,,,,,,,,,"Hi J. J., I appreciate your enthusiasm for you...","Hi J. J., Thank for wanting to be involved in ...",,"Hi Alex, I am extremely disappointed in the be...",,,"Hi Jennifer, I really do not think we are losi...",,"Hi J. J. Again, I appreciate your enthusiasm b...","Hello Isaac, I am glad to hear of the upgrades...",,,,"Hi Daisha, Thank you for your efforts to minim...",


In [9]:
test_df.head()

Unnamed: 0,response_id,rating_chooses_appropriate_action,rating_commits_to_action,rating_gathers_information,rating_identifies_issues_opportunities,rating_interprets_information,rating_involves_others,rating_decision_making_final_score,text_exercise_4,text_exercise_5,text_exercise_6,text_exercise_7,text_exercise_8,text_exercise_9,text_exercise_10,text_exercise_11,text_exercise_12,text_exercise_13,text_exercise_14,text_exercise_15,text_exercise_16,text_exercise_17,text_exercise_18,text_exercise_19,text_exercise_final
0,DMhdlGkGeHU,,,,,,,,,,"J.J., I will check with Karen Duvalier about w...","J.J. , I am not sure what process will be used...",,,,,,,"Hello J.J., thank you for the insight. Before ...",,,,,,"I tried to respond to each one as I read it, b..."
1,HmHzfEmhMXe,,,,,,,,,,J.J. Iwill talk to her when I come back from v...,"J.J I can't do that ,is not right besides you ...",,"Alex, Thisis embarrazing I will take matter in...",,"Shea, Iwill be on vaction but I send an e-mail...","Jennifer, This is alarming I didn't know this ...",,J.J I am looking foward to meet with you in re...,"Isaac, Ilook foward to meeting you in person a...",,,,"Vic, iIcan't believe this can you please check...",
2,jvuxQdkBzjt,,,,,,,,,,"J.J., Before we make any decisions on moving h...","J.J, This will be a perfect opportunity for us...",,"Norman, Thank you for bringing this to my atte...",,"Shea, Thanks for the information. April 21st a...","Jennifer, Thank you for this information. Afte...",,"J.J., Thanks for the update. I appreciate that...",,,,,"Vic, J.J. recently notified me that he was wor...",I looked at the heading of the email first. If...
3,RHVGolCWJGA,,,,,,,,,"Tracy, Yes, you can sent the graph to Haley bu...",,,"DearTaylor, Great if you have foundsomeone to ...","Dear Alex, Noted,will do the necessary. Cary","Haley, Ipropose Blaine Lerner's for the follow...",,,"Hi Haley, I am nominating the following: 1. Ja...",,,"Dear Taylor, As for this issue, I suggest the ...",,"Dear Taylor, I propose that you call Mr.. Weed...","Dear Liang, Tracy did informed earlier today t...",
4,RmfWkJyFKtZ,,,,,,,,,,,J.J. I'll address your thoughts and the though...,,,,,"Jennifer, Thanks for the info. very useful. Le...",,J.J. Gotta love your enthusiasm! Thanks for th...,"Isaac, Got your email. Sounds like a nice impr...",,,,"Vic, We need to talk about this immediately. J...",See below...Original Message----- From: Assess...


## Exploratory Data Analysis

In [10]:
# descriptives for each target
train_df.iloc[:,2:9].describe()

Unnamed: 0,rating_commits_to_action,rating_gathers_information,rating_identifies_issues_opportunities,rating_interprets_information,rating_involves_others,rating_decision_making_final_score
count,1466.0,1466.0,1386.0,1466.0,1381.0,1466.0
mean,3.092769,2.703274,2.668831,2.465211,2.80449,3.22101
std,0.611261,0.648403,0.564276,0.619769,0.673092,1.308516
min,1.0,1.0,1.0,2.0,2.0,1.0
25%,3.0,2.0,2.0,2.0,2.0,2.0
50%,3.0,3.0,3.0,2.0,3.0,3.0
75%,3.0,3.0,3.0,3.0,3.0,4.0
max,4.0,4.0,4.0,4.0,4.0,7.0


In [11]:
# correlation table
train_df.iloc[:,2:9].corr()

  train_df.iloc[:,2:9].corr()


Unnamed: 0,rating_commits_to_action,rating_gathers_information,rating_identifies_issues_opportunities,rating_interprets_information,rating_involves_others,rating_decision_making_final_score
rating_commits_to_action,1.0,0.252056,0.167202,0.253569,0.307503,0.543572
rating_gathers_information,0.252056,1.0,0.125584,0.151798,0.218536,0.347666
rating_identifies_issues_opportunities,0.167202,0.125584,1.0,0.30721,0.157192,0.384089
rating_interprets_information,0.253569,0.151798,0.30721,1.0,0.200095,0.67695
rating_involves_others,0.307503,0.218536,0.157192,0.200095,1.0,0.359319
rating_decision_making_final_score,0.543572,0.347666,0.384089,0.67695,0.359319,1.0


In [12]:
# number of data for each target & exercies
train_df.notna().sum()

response_id                               1466
rating_chooses_appropriate_action         1386
rating_commits_to_action                  1466
rating_gathers_information                1466
rating_identifies_issues_opportunities    1386
rating_interprets_information             1466
rating_involves_others                    1381
rating_decision_making_final_score        1466
text_exercise_4                            185
text_exercise_5                            236
text_exercise_6                           1042
text_exercise_7                           1012
text_exercise_8                            191
text_exercise_9                           1145
text_exercise_10                           223
text_exercise_11                          1091
text_exercise_12                           996
text_exercise_13                           211
text_exercise_14                          1085
text_exercise_15                          1093
text_exercise_16                           197
text_exercise

In [13]:
# descriptives on number of exercises completed per subject
train_df.iloc[:,8:].notna().sum(axis=1).describe()

count    1466.000000
mean        8.408595
std         1.718053
min         1.000000
25%         8.000000
50%         9.000000
75%        10.000000
max        17.000000
dtype: float64

## Dealing with Missing Labels

Here we investigate if there are any missing labels in the training dataset and propose different methods for handling the missing values.

In [14]:
train_df.isna().sum()

response_id                                  0
rating_chooses_appropriate_action           80
rating_commits_to_action                     0
rating_gathers_information                   0
rating_identifies_issues_opportunities      80
rating_interprets_information                0
rating_involves_others                      85
rating_decision_making_final_score           0
text_exercise_4                           1281
text_exercise_5                           1230
text_exercise_6                            424
text_exercise_7                            454
text_exercise_8                           1275
text_exercise_9                            321
text_exercise_10                          1243
text_exercise_11                           375
text_exercise_12                           470
text_exercise_13                          1255
text_exercise_14                           381
text_exercise_15                           373
text_exercise_16                          1269
text_exercise

In [15]:
missing_label_cols = [
    'rating_chooses_appropriate_action',
    'rating_identifies_issues_opportunities',
    'rating_involves_others',
]

available_missing_label_approaches = [
    'drop',
    'mean'
]
missing_label_approach = available_missing_label_approaches[0]

In [16]:
if missing_label_approach == 'drop':
    train_df = train_df.dropna(subset=train_df.columns[1:8])
elif missing_label_approach == 'mean':
    mean_values = {
        l: train_df[l].mean() for l in missing_label_cols
    }
    train_df = train_df.fillna(value=mean_values)

In [17]:
label_columns = [
    'rating_chooses_appropriate_action',
    'rating_commits_to_action',
    'rating_gathers_information',
    'rating_identifies_issues_opportunities',
    'rating_interprets_information',
    'rating_involves_others',
    'rating_decision_making_final_score'
]

rating_weight = (1-0.4)/(len(label_columns)-1)
rating_weight
output_weights = [rating_weight]*(len(label_columns)-1) + [0.4]

## Concatenating All Exercise Responses (excluding final)

We observe the values of column 'text_exercise_final' are not quite informative. Therefore, we remove column 'text_exercise_final' and do not include it in the input data.

In [18]:
pd.set_option('display.max_colwidth', None)

In [19]:
train_df['text_exercise_final'].head(2)

0    See below for answers.Original Message----- From: Assessment Administrator Sent: 4/5/2016 2:20 PM To: Stevens, Cary Cc: Subject : RESPONSE REQUIRED: About your day This e-mail contains some final questions for you. Your answers to these questions will help in the evaluation of your performance by providing a better understanding of your approach and rationale. It is important that you provide complete and thorough answers to the questions below.  For your reference, a list of messages you received today is shown after question 4.  1. Please list the major categories of issues or problems facing Customer Service Team 5 of Soundproof Solutions. Why is each an issue or a problem? Qualty problems with the paint at the hospital and how the call was handeled, there was also issues with peeling issues as well. This is an issue because it could lead to lost sales Mutual Respect issues between collegues as evident by the Elatu lunch incident and Jess' s animosity towards Chris's promotion.

In [20]:
pd.set_option('display.max_colwidth', 50)

In [21]:
train_df.iloc[:,8:] = train_df.iloc[:,8:].fillna('')
dev_df.iloc[:,8:] = dev_df.iloc[:,8:].fillna('')
test_df.iloc[:,8:] = test_df.iloc[:,8:].fillna('')

In [22]:
train_df['text_exercise_concat'] = train_df[train_df.columns[8]]
for i in range(9, len(train_df.columns)-1):
    train_df['text_exercise_concat'] += train_df[train_df.columns[i]]

In [23]:
dev_df['text_exercise_concat'] = dev_df[dev_df.columns[8]]
for i in range(9, len(dev_df.columns)-1):
    dev_df['text_exercise_concat'] += dev_df[dev_df.columns[i]]

In [24]:
test_df['text_exercise_concat'] = test_df[test_df.columns[8]]
for i in range(9, len(test_df.columns)-1):
    test_df['text_exercise_concat'] += test_df[test_df.columns[i]]

In [25]:
# descriptives on text length per subject
train_df['text_exercise_concat'].apply(lambda x: len(x.split(' '))).describe()

count    1381.000000
mean     1123.740768
std       500.314925
min       126.000000
25%       792.000000
50%      1107.000000
75%      1417.000000
max      4030.000000
Name: text_exercise_concat, dtype: float64

# Fine-tune A Regression Model using the Labeled Dataset

In [26]:
class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = torch.nn.functional.mse_loss(logits, labels)
        return (loss, outputs) if return_outputs else loss


class DecisionMakingDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred

    mse = mean_squared_error(labels, logits, multioutput=output_weights)
    mae = mean_absolute_error(labels, logits, multioutput=output_weights)
    r2 = r2_score(labels, logits, multioutput=output_weights)

    weighted_r = [output_weights[i]*pearsonr(np.squeeze(np.asarray(labels[:, i])), np.squeeze(np.asarray(logits[:, i]))).statistic 
                  for i in range(labels.shape[1])]
    weighted_r = sum(weighted_r)
    r = [pearsonr(np.squeeze(np.asarray(labels[:, i])), np.squeeze(np.asarray(logits[:, i]))).statistic for i in range(labels.shape[1])]
    r = sum(r) / len(r)

    return {"mse": mse, "mae": mae, "r2_adj": r2, "r": r, "weighted_r": weighted_r}


## Tokenizer and Model

In [27]:
date_str = '14Apr2023'
MODEL_NAME = "kiddothe2b/adhoc-hierarchical-transformer-base-4096"
LEARNING_RATE = 1e-5
WEIGHT_DECAY = 0.15
MAX_LENGTH = 4096
BATCH_SIZE = 2
EPOCHS = 10

In [28]:
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME, 
    trust_remote_code=True
)

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, 
    num_labels=len(label_columns),
    trust_remote_code=True
)

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Some weights of the model checkpoint at kiddothe2b/adhoc-hierarchical-transformer-base-4096 were not used when initializing HATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing HATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification 

In [29]:
%%capture
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

## Splitting the labeled dataset into 3 sets: train, validation, and test; Normalizing the labels

In [30]:
train_texts, val_texts, train_labels, val_labels = train_test_split(train_df['text_exercise_concat'],
                                                                    train_df[label_columns],
                                                                    test_size=.2, 
                                                                    random_state=1234)

train_texts = [str(i) for i in train_texts]
val_texts = [str(i) for i in val_texts]

for col in label_columns:
    maximum = train_labels[col].max() - 1
    train_labels[col] = train_labels[col].apply(float)
    train_labels[col] = (train_labels[col] - 1) / maximum
    train_labels[col] = train_labels[col] * 3 + 1
    val_labels[col] = val_labels[col].apply(float)
    val_labels[col] = (val_labels[col] - 1) / maximum
    val_labels[col] = val_labels[col] * 3 + 1

train_labels = train_labels.values.tolist()
val_labels = val_labels.values.tolist()

## Formatting the different datasets into the right format for the model

In [31]:
train_encodings = tokenizer(train_texts, padding=True, max_length=MAX_LENGTH, truncation=True)
val_encodings = tokenizer(val_texts, padding=True, max_length=MAX_LENGTH, truncation=True)

train_dataset = DecisionMakingDataset(train_encodings, train_labels)
val_dataset = DecisionMakingDataset(val_encodings, val_labels)



## Training loop

In [32]:
training_args = TrainingArguments(
    output_dir=f"../models/{MODEL_NAME}-fine-tuned-regression-{missing_label_approach}-{date_str}",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    metric_for_best_model="weighted_r",
    load_best_model_at_end=True,
    weight_decay=WEIGHT_DECAY,
    logging_strategy='epoch',
)

In [33]:
trainer = RegressionTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics_for_regression,
)

In [34]:
trainer.train()

[2023-04-15 23:15:07.575: W smdistributed/modelparallel/torch/nn/predefined_hooks.py:78] Found unsupported HuggingFace version 4.28.1 for automated tensor parallelism. HuggingFace modules will not be automatically distributed. You can use smp.tp_register_with_module API to register desired modules for tensor parallelism, or directly instantiate an smp.nn.DistributedModule. Supported HuggingFace transformers versions for automated tensor parallelism: ['4.17.0', '4.20.1', '4.21.0']


INFO:root:Using NamedTuple = typing._NamedTuple instead.


[2023-04-15 23:15:07.749 pytorch-1-13-gpu--ml-g4dn-12xlarge-3a8071c4fc04fdac814ceb809ccb:20 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2023-04-15 23:15:07.775 pytorch-1-13-gpu--ml-g4dn-12xlarge-3a8071c4fc04fdac814ceb809ccb:20 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.




Epoch,Training Loss,Validation Loss,Mse,Mae,R2 Adj,R,Weighted R
1,0.7877,0.414848,0.419902,0.536476,-0.038848,0.104925,0.144782
2,0.395,0.383366,0.386218,0.491555,0.043619,0.243704,0.285168
3,0.3787,0.392071,0.386419,0.484358,0.035566,0.289752,0.335749
4,0.3659,0.394594,0.388858,0.475792,0.030759,0.345901,0.398505
5,0.3489,0.36997,0.352974,0.47913,0.121197,0.367297,0.42508
6,0.3396,0.393463,0.395977,0.520796,0.018622,0.352278,0.417923
7,0.3205,0.362787,0.34795,0.479202,0.135072,0.393831,0.455602
8,0.3067,0.354186,0.338645,0.469978,0.15848,0.404428,0.462478
9,0.2995,0.3554,0.339425,0.4697,0.15641,0.412817,0.469161
10,0.2882,0.364656,0.354519,0.484679,0.120118,0.411423,0.467321




TrainOutput(global_step=1380, training_loss=0.3830705974413001, metrics={'train_runtime': 2833.4353, 'train_samples_per_second': 3.896, 'train_steps_per_second': 0.487, 'total_flos': 3.093131013783552e+16, 'train_loss': 0.3830705974413001, 'epoch': 10.0})

In [35]:
trainer.evaluate()



{'eval_loss': 0.3554004430770874,
 'eval_mse': 0.3394253581762314,
 'eval_mae': 0.46970003843307495,
 'eval_r2_adj': 0.15641019337954654,
 'eval_r': 0.4128167159641482,
 'eval_weighted_r': 0.46916079769908736,
 'eval_runtime': 25.2247,
 'eval_samples_per_second': 10.981,
 'eval_steps_per_second': 1.388,
 'epoch': 10.0}

## Error Analysis and Model Evaluation

In [36]:
os.makedirs(f'./output/{date_str}')

In [37]:
nb_batches = math.ceil(len(val_texts)/BATCH_SIZE)
y_preds = []

for i in range(nb_batches):
    input_texts = val_texts[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    input_labels = val_labels[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    encoded = tokenizer(input_texts, padding=True, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
    encoded = {key: torch.tensor(val).to(device) for key, val in encoded.items()}
    y_preds += model(**encoded).logits.tolist()

pd.set_option('display.max_rows', 500)
df = pd.DataFrame([val_texts, val_labels, y_preds], ["Text", "Score", "Prediction"]).T
df["Rounded Prediction"] = df["Prediction"].apply(np.round)

df.to_csv(f'output/{date_str}/{MODEL_NAME[-4:]}-train-{missing_label_approach}-{date_str}.csv')

# incorrect_cases = df[(df["Score"] != df["Rounded Prediction"]) & (df["Text"] != "nan")]
# incorrect_cases.shape



# Predictions on the Dev Set

In [38]:
dev_texts = list(dev_df['text_exercise_concat'].values)
dev_labels = list(dev_df['rating_decision_making_final_score'].values)

nb_batches = math.ceil(len(dev_texts)/BATCH_SIZE)
y_preds = []

for i in range(nb_batches):
    input_texts = dev_texts[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    input_labels = dev_labels[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    encoded = tokenizer(input_texts, padding=True, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
    encoded = {key: torch.tensor(val).to(device) for key, val in encoded.items()}
    y_preds += model(**encoded).logits.tolist()

pd.set_option('display.max_rows', 500)
df = pd.DataFrame([dev_texts, y_preds], ["Text", "Prediction"]).T
df["Rounded Prediction"] = df["Prediction"].apply(np.round)

df.to_csv(f'output/{date_str}/{MODEL_NAME[-4:]}-dev-{missing_label_approach}-{date_str}.csv')



# Predictions on the Test Set

In [39]:
test_texts = list(test_df['text_exercise_concat'].values)
test_labels = list(test_df['rating_decision_making_final_score'].values)

nb_batches = math.ceil(len(test_texts)/BATCH_SIZE)
y_preds = []

for i in range(nb_batches):
    input_texts = test_texts[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    input_labels = test_labels[i * BATCH_SIZE: (i+1) * BATCH_SIZE]
    encoded = tokenizer(input_texts, padding=True, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
    encoded = {key: torch.tensor(val).to(device) for key, val in encoded.items()}
    y_preds += model(**encoded).logits.tolist()

pd.set_option('display.max_rows', 500)
df = pd.DataFrame([test_texts, y_preds], ["Text", "Prediction"]).T
df["Rounded Prediction"] = df["Prediction"].apply(np.round)

df.to_csv(f'output/{date_str}/{MODEL_NAME[-4:]}-test-{missing_label_approach}-{date_str}.csv')



# Prepare the output files in the required format

In [40]:
input_dir = f'./output/{date_str}/' # directory that stores result files
input_filename = f'4096-test-drop-{date_str}.csv' # file name
output_suffix = 'test_drop' # output file suffix
prediction_type = 'Prediction'

df = test_df[['response_id', 'text_exercise_concat']] # test or dev

In [41]:
results = pd.read_csv(input_dir + input_filename).drop('Unnamed: 0', axis=1)
results = df.merge(results, left_on='text_exercise_concat', right_on='Text').drop('text_exercise_concat', axis=1)

In [42]:
%%capture
df[['rating_chooses_appropriate_action',
    'rating_commits_to_action',
    'rating_gathers_information',
    'rating_identifies_issues_opportunities',
    'rating_interprets_information',
    'rating_involves_others',
    'rating_decision_making_final_score']] = pd.DataFrame(results[prediction_type].apply(lambda x: 
                         
                                                                                         [float(pred) for pred in x[1:-1].split(', ')]).tolist())
df = df.drop('text_exercise_concat', axis=1, inplace=False)

In [43]:
# scale final score back to 1-7
df['rating_decision_making_final_score'] = ((df['rating_decision_making_final_score'] - 1) / 3 * 6) + 1

In [44]:
df_rounded = round(df)

In [45]:
df.to_csv(input_dir + 'processed_predictions_' + output_suffix + '.csv', index=False)
df_rounded.to_csv(input_dir + 'processed_predictions_rounded_' + output_suffix + '.csv', index=False)