In [1]:
%cd ~/RATER-C

/home/daved/RATER-C


In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import torch
import numpy as np
import pandas as pd

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
import random
from transformers import set_seed
seed = 123

set_seed(seed)
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

In [4]:
model_checkpoint = 'models/microsoft_deberta-v3-large'
pretrained_model_name_or_path = model_checkpoint.split('models/')[1].replace('_', '/')

tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)

model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels = 2
).to(device)

pipe = TextClassificationPipeline(model = model, tokenizer = tokenizer, top_k = None, device = device)

Device set to use cuda


In [5]:
cm_df = pd.read_excel("data/raw/Highest domain-fixed.xlsx", sheet_name = 2) # sheet 2 for Study 1; sheet 3 for Study 2
pred_df = pd.read_excel("data/raw/Sample data sheet for Kai v9.xlsx")

human_sheet = pred_df.merge(cm_df, on = 'Match ID', how = 'right')
human_sheet['Full construct'] = human_sheet['Construct name'] + ': ' + human_sheet['Construct definition']

In [6]:
data_test_dict = []

for i, _ in human_sheet.iterrows():
    data_test_dict.append({'text': human_sheet['Construct definition'][i], 
                           'text_pair': human_sheet['Item text'][i]})

data_test_dict[0]

{'text': 'The extent to which the recruiter and the job applicant share important values, attitudes, and beliefs.',
 'text_pair': 'The job applicant and I are similar in terms of our outlook, perspective, and values.'}

In [7]:
# temporary workaround for XLNet batch size issue
if str(model.base_model).find('XLNetModel') != -1:
    batch_size = 1
else:
    batch_size = 128

raw_probs = pipe(data_test_dict, batch_size = batch_size)

probs = np.array([item[['LABEL_1' == i['label'] for i in item].index(True)]['score'] for item in raw_probs])
preds = np.where(probs >= 0.5, 1, 0)

In [8]:
human_sheet['Model Prob'] = probs
human_sheet['Model Pred'] = preds

len(human_sheet)

467

In [9]:
human_sheet[['Domain HT']] = human_sheet[['Domain HT']].replace(0.0, '')
human_sheet[['Domain v6']] = human_sheet[['Domain v6']].replace(0.0, '')
human_sheet[['Domain loading']] = human_sheet[['Domain loading']].replace(0.0, '')

In [10]:
def conf_matrix (sheet, ground_truth = 'author', final_loading = None, grouped = False):

    sheet = sheet.copy()

    sheet['Final loading'] = final_loading
    
    if ground_truth == 'author':
        target_df = sheet.query('Target_y == 1')
        non_target_df = sheet.query('Target_y == 0')
        
        tp = sum(target_df['Final loading'] == target_df['Construct ID'])
        fn = sum(target_df['Final loading'] != target_df['Construct ID'])
        fp = sum(non_target_df['Final loading'] == non_target_df['Construct ID'])
        tn = sum(non_target_df['Final loading'] != non_target_df['Construct ID'])
        
    elif ground_truth == 'ht':
        target_df = sheet[sheet['Domain HT'] != '']
        non_target_df = sheet[sheet['Domain HT'] == '']

        tp = sum(target_df['Final loading'] == target_df['Domain HT'])
        fn = sum(target_df['Final loading'] != target_df['Domain HT'])
        fp = sum(non_target_df['Final loading'] != non_target_df['Domain HT'])
        tn = sum(non_target_df['Final loading'] == non_target_df['Domain HT'])
    elif ground_truth == 'domain':
        sheet = sheet[~sheet['Domain loading'].isna()].copy()

        target_df = sheet[sheet['Domain loading'] != '']
        non_target_df = sheet[sheet['Domain loading'] == '']

        tp = sum(target_df['Final loading'] == target_df['Domain loading'])
        fn = sum(target_df['Final loading'] != target_df['Domain loading'])
        fp = sum(non_target_df['Final loading'] != non_target_df['Domain loading'])
        tn = sum(non_target_df['Final loading'] == non_target_df['Domain loading'])

    print('Total Data Rows:', len(sheet))
    
    print(np.array([[tp, fp], [fn, tn]]))
    
    Accuracy = (tp + tn) / (tp + tn + fp + fn)
    Sensitivity = tp / (tp + fn)
    Specificity = tn / (tn + fp)
    PrecPos = tp / (tp + fp)
    PrecNeg = tn / (tn + fn)
    F1 = (2 * Sensitivity * PrecPos) / (Sensitivity + PrecPos)

    P_o = (tp + tn) / (tp + tn + fp + fn)
    total = tp + tn + fp + fn
    P_e = ((tp + fp) * (tp + fn) + (tn + fn) * (tn + fp)) / (total ** 2)
    Kappa = (P_o - P_e) / (1 - P_e)
    
    if grouped:
        print('Source ID:', np.unique(sheet['Source ID'])[0])
    print('Accuracy:', np.round(Accuracy * 100, 0))
    print('Error Rate:', np.round(100 - Accuracy * 100, 0))
    print('Recall of positive class (sensitivity):', np.round(Sensitivity * 100, 0))
    print('Recall of negative class (specificity):', np.round(Specificity * 100, 0))
    print('Precision of positive class:', np.round(PrecPos * 100, 0))
    print('Precision of negative class:', np.round(PrecNeg * 100, 0))
    print('F-Measure:', np.round(F1, 2))
    print('Cohen\'s Kappa:', np.round(Kappa, 2))
    print('---')

In [11]:
for each in ['H&T ratings', 'Model Prob']:
    print('RESULT:', each)

    calc_loading = human_sheet.loc[human_sheet.groupby('Item ID')[each].rank(method = 'min', ascending = False) == 1, 'Construct ID'].reindex(human_sheet.index, fill_value = '')
    human_sheet['Final loading - ' + each] = calc_loading

    conf_matrix(human_sheet, ground_truth = 'author', final_loading = calc_loading)

RESULT: H&T ratings
Total Data Rows: 467
[[138  21]
 [ 21 287]]
Accuracy: 91.0
Error Rate: 9.0
Recall of positive class (sensitivity): 87.0
Recall of negative class (specificity): 93.0
Precision of positive class: 87.0
Precision of negative class: 93.0
F-Measure: 0.87
Cohen's Kappa: 0.8
---
RESULT: Model Prob
Total Data Rows: 467
[[131  28]
 [ 28 280]]
Accuracy: 88.0
Error Rate: 12.0
Recall of positive class (sensitivity): 82.0
Recall of negative class (specificity): 91.0
Precision of positive class: 82.0
Precision of negative class: 91.0
F-Measure: 0.82
Cohen's Kappa: 0.73
---


In [12]:
human_sheet = human_sheet.merge(pd.DataFrame(human_sheet.groupby('Item ID')['Model Prob'].mean()).reset_index().rename(columns = {'Model Prob': 'Mean Model Prob'}), how = 'left')
human_sheet.drop(['Reversed_x', 'Target_x', 'P match', 'P match v1', 'P match v2', 'P match v3', 'P match v4', 'LSACosT2T', 'LSACosD2D', 'Word2VecCos', 'BERTCos', 'Full construct'], axis = 1).to_csv('data/analysis/model_results_' + str(len(human_sheet)) + '.csv', index = False)

len(human_sheet)

467