In [1]:
import pickle
import pandas as pd
pd.options.display.max_colwidth = 500


def read_file(file_name):
    df=[]
    with open(file_name, 'rb') as f:
        df = pickle.load(f)
    return df

def format_feature_names(x):
    ll=[]
    param_grid=x['best_params']
    for i in x['features']:
        #print(i)
        if i['name'] == 'ngram':
            pg_name=''.join(('features__', i['comb_name'], '__feature_extraction__ngram_range'))
            ngram_range=param_grid[pg_name]
            reduced='reduced' if i['feature_selection'] == True else ''
            ll.append(' '.join((i['name'], ' words', str(tuple(ngram_range)), reduced)))
        elif i['name'] == 'type_dependency':
            pg_name=''.join(('features__', i['comb_name'], '__feature_extraction__ngram_range'))
            ngram_range_td=param_grid[pg_name]
            reduced='reduced' if i['feature_selection'] == True else ''
            ll.append(' '.join(('ngram typed dependency', str(tuple(ngram_range_td)), reduced)))
        elif i['name']=='bert_doc':
            reduced=' reduced' if i['feature_selection'] == True else ''
            ll.append(''.join(('bert_doc', reduced)))
        else:
            ll.append(i['name'])
    return ' + '.join(ll)

def group_results(df, order=True):
    df['features_']=[format_feature_names(row) for index, row in df.iterrows()]
    
    group_df=df[[
        'macro avg precision','macro avg recall','macro avg f1-score',
        'model_name', 'train_domain', 'test_domain', 'features_']].groupby(
        ['train_domain','test_domain','model_name','features_']).agg(
        ['mean']
    ).round(3)
    
    if order:
        group_df=group_df.sort_values(by=[('macro avg f1-score', 'mean')
                      ,('macro avg precision', 'mean')
                     ,('macro avg recall', 'mean')], ascending=False)
        
    group_df.reset_index(inplace=True) 

    return group_df

def get_results_by_model(df, model_name, sort_by, drop_features=True, drop_domains=True):
    df=df[
    ['train_domain', 'test_domain','features_', 'macro avg precision', 'macro avg recall', 'macro avg f1-score']
][df.model_name.isin([model_name])].sort_values(by=[sort_by], ascending=False)
    df.reset_index(inplace=True)
    
    drop_columns=['index']
    if drop_features:
        drop_columns.append('features_')
    
    if drop_domains:
        drop_columns.append('train_domain')
        drop_columns.append('test_domain')
        
    df=df.drop(drop_columns, axis=1)

    df=df.rename(columns={"macro avg f1-score": ' - '.join((model_name, 'F1')) })
    df=df.rename(columns={"macro avg precision": ' - '.join((model_name, 'P'))})
    df=df.rename(columns={"macro avg recall": ' - '.join((model_name, 'R'))})
    return df

### Table 10: Classifications result of the models using individual features.

In [192]:
df=read_file('../experiments/2_rq1/results_rq1.pkl')
df=df[(df['train_domain'] == 'BHOCS') & 
      (df['test_domain'] == 'BHOCS') & 
      df.model_name.isin(['logistic_regression', 'svm', 'cnn'])]
df=group_results(df, order=True)

df=df[~df['features_'].str.contains('\+')]

lr=get_results_by_model(df, 'logistic_regression', 'features_', drop_features=False, drop_domains=False)
svm=get_results_by_model(df, 'svm', 'features_',)
pd.concat([lr, svm], axis=1)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,train_domain,test_domain,features_,logistic_regression - P,logistic_regression - R,logistic_regression - F1,svm - P,svm - R,svm - F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,mean,mean,mean,mean,mean
0,BHOCS,BHOCS,sentiment,0.532,0.531,0.527,0.554,0.551,0.545
1,BHOCS,BHOCS,"ngram typed dependency (1, 4) reduced",0.682,0.681,0.681,0.689,0.688,0.687
2,BHOCS,BHOCS,"ngram typed dependency (1, 4)",0.685,0.685,0.684,0.688,0.688,0.687
3,BHOCS,BHOCS,"ngram typed dependency (1, 1) reduced",0.688,0.686,0.686,0.677,0.675,0.675
4,BHOCS,BHOCS,"ngram typed dependency (1, 1)",0.688,0.688,0.687,0.686,0.684,0.683
5,BHOCS,BHOCS,"ngram words (1, 4) reduced",0.74,0.73,0.728,0.746,0.738,0.735
6,BHOCS,BHOCS,"ngram words (1, 4)",0.747,0.737,0.734,0.746,0.741,0.74
7,BHOCS,BHOCS,"ngram words (1, 1) reduced",0.737,0.736,0.736,0.737,0.736,0.736
8,BHOCS,BHOCS,"ngram words (1, 1)",0.738,0.738,0.737,0.745,0.745,0.745
9,BHOCS,BHOCS,bert_doc reduced,0.717,0.717,0.717,0.71,0.709,0.709


In [4]:
cnn=get_results_by_model(df, 'cnn', 'features_', drop_features=False, drop_domains=False)
cnn

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,train_domain,test_domain,features_,cnn - P,cnn - R,cnn - F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,mean,mean
0,BHOCS,BHOCS,bert_word,0.741,0.733,0.73


### Table 11 - 12 : Classifications result of the models using combined features.

In [216]:
df=read_file('../experiments/1_rq1/results_rq1.pkl')
df=df[(df['train_domain'] == 'BHOCS') & 
      (df['test_domain'] == 'BHOCS') & 
      df.model_name.isin(['logistic_regression', 'svm', 'cnn'])]
df=group_results(df, order=True)

df=df[df['features_'].str.count('\+') <= 1]
df=df[(df['features_'].str.contains('ngram  words \(1, 1\)  \+')) |
      (df['features_'].str.contains('\+ ngram  words \(1, 1\)'))
     ]

lr=get_results_by_model(df, 'logistic_regression', 'features_', drop_features=False, drop_domains=False)
svm=get_results_by_model(df, 'svm', 'features_',)
ngram_11=pd.concat([lr, svm], axis=1)

#
df=read_file('../experiments/1_rq1/results_rq1.pkl')
df=df[(df['train_domain'] == 'BHOCS') & 
      (df['test_domain'] == 'BHOCS') & 
      df.model_name.isin(['logistic_regression', 'svm', 'cnn'])]
df=group_results(df, order=True)

df=df[df['features_'].str.count('\+') <= 1]
df=df[(df['features_'].str.contains('ngram  words \(1, 4\)  \+')) |
      (df['features_'].str.contains('\+ ngram  words \(1, 4\)'))
     ]

lr=get_results_by_model(df, 'logistic_regression', 'features_', drop_features=False, drop_domains=False)
svm=get_results_by_model(df, 'svm', 'features_',)
ngram_14=pd.concat([lr, svm], axis=1)

pd.concat([ngram_11, ngram_14], axis=0)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,train_domain,test_domain,features_,logistic_regression - P,logistic_regression - R,logistic_regression - F1,svm - P,svm - R,svm - F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,mean,mean,mean,mean,mean
0,BHOCS,BHOCS,"sentiment + ngram words (1, 1) reduced",0.735,0.735,0.735,0.743,0.742,0.742
1,BHOCS,BHOCS,"sentiment + ngram words (1, 1)",0.736,0.736,0.735,0.74,0.739,0.739
2,BHOCS,BHOCS,"ngram words (1, 1) + ngram typed dependency (1, 4) reduced",0.75,0.749,0.749,0.753,0.752,0.752
3,BHOCS,BHOCS,"ngram words (1, 1) + ngram typed dependency (1, 4)",0.747,0.746,0.746,0.746,0.746,0.746
4,BHOCS,BHOCS,"ngram words (1, 1) + ngram typed dependency (1, 1) reduced",0.75,0.749,0.749,0.749,0.749,0.749
5,BHOCS,BHOCS,"ngram words (1, 1) + ngram typed dependency (1, 1)",0.743,0.742,0.742,0.748,0.748,0.747
6,BHOCS,BHOCS,"ngram words (1, 1) + bert_doc reduced",0.757,0.757,0.757,0.745,0.744,0.744
7,BHOCS,BHOCS,"ngram words (1, 1) + bert_doc",0.758,0.757,0.757,0.75,0.749,0.749
0,BHOCS,BHOCS,"sentiment + ngram words (1, 4) reduced",0.752,0.738,0.734,0.732,0.724,0.722
1,BHOCS,BHOCS,"sentiment + ngram words (1, 4)",0.747,0.73,0.725,0.739,0.727,0.724


### Table 13: The comparison of the results obtained from Logit, SVM, and CNN with the baselines. The models trained and tested on the bhosc data.

In [148]:
df=read_file('../experiments/2_rq1/results_rq1.pkl')
df=group_results(df, order=True)

lr=df[df.model_name.isin(['logistic_regression'])].head(1)
svm=df[df.model_name.isin(['svm'])].head(1)
df=df[~df.model_name.isin(['svm','logistic_regression'])]
pd.concat([df, lr, svm]).sort_values(by=[('macro avg f1-score', 'mean')], ascending=False)

Unnamed: 0_level_0,train_domain,test_domain,model_name,features_,macro avg precision,macro avg recall,macro avg f1-score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,mean,mean
0,BHOCS,BHOCS,logistic_regression,"ngram words (1, 4) + ngram typed dependency (1, 4) reduced + bert_doc reduced",0.774,0.772,0.771
1,BHOCS,BHOCS,svm,"ngram words (1, 4) + ngram typed dependency (1, 4) + bert_doc reduced",0.774,0.772,0.771
258,BHOCS,BHOCS,cnn,bert_word,0.741,0.733,0.73
297,BHOCS,BHOCS,gender_word,gender_word,0.729,0.649,0.616
298,BHOCS,BHOCS,threshold_classifier,threshold_classifier,0.604,0.604,0.604


### Table 14: The robustnes of the classifiers when trained on the bhocs data and tested across datasets.

In [165]:
df=read_file('../experiments/3_rq2_across_data_domains/results_rq2.pkl')
df=df[(df['train_domain'] == 'BHOCS') & df.model_name.isin(['logistic_regression', 'svm', 'cnn'])]
df=group_results(df, order=True)
drop_domains
lr=get_results_by_model(df, 'logistic_regression', 'test_domain', drop_domains=False)
svm=get_results_by_model(df, 'svm', 'test_domain')
cnn=get_results_by_model(df, 'cnn', 'test_domain')

pd.concat([lr, svm, cnn], axis=1)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,train_domain,test_domain,logistic_regression - P,logistic_regression - R,logistic_regression - F1,svm - P,svm - R,svm - F1,cnn - P,cnn - R,cnn - F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
0,BHOCS,S,0.692,0.69,0.689,0.7,0.697,0.696,0.664,0.657,0.653
1,BHOCS,CM,0.694,0.676,0.668,0.697,0.677,0.668,0.666,0.642,0.628
2,BHOCS,C,0.754,0.747,0.745,0.762,0.755,0.753,0.742,0.729,0.725
3,BHOCS,BHOM,0.702,0.679,0.67,0.686,0.666,0.657,0.639,0.617,0.602
4,BHOCS,BHOCSM,0.692,0.678,0.672,0.689,0.674,0.668,0.652,0.634,0.622
5,BHOCS,BHOCS,0.774,0.772,0.771,0.774,0.772,0.771,0.741,0.733,0.73
6,BHOCS,BHO,0.839,0.839,0.839,0.827,0.826,0.826,0.782,0.774,0.772


### Table 15: The performance of the classifiers when trained across 6 datasets and tested on the scales dataset.

In [164]:
df=read_file('../experiments/3_rq2_across_data_domains/results_rq2.pkl')
df=df[(df['test_domain'] == 'S') & df.model_name.isin(['logistic_regression', 'svm', 'cnn'])]
df=group_results(df, order=True)

lr=get_results_by_model(df, 'logistic_regression', 'train_domain', drop_domains=False)
svm=get_results_by_model(df, 'svm', 'train_domain')
cnn=get_results_by_model(df, 'cnn', 'train_domain')

pd.concat([lr, svm, cnn], axis=1)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,train_domain,test_domain,logistic_regression - P,logistic_regression - R,logistic_regression - F1,svm - P,svm - R,svm - F1,cnn - P,cnn - R,cnn - F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
0,CM,S,0.571,0.532,0.459,0.572,0.528,0.448,0.546,0.533,0.48
1,C,S,0.524,0.516,0.455,0.51,0.504,0.441,0.49,0.487,0.427
2,BHOM,S,0.536,0.53,0.506,0.537,0.533,0.507,0.527,0.52,0.486
3,BHOCSM,S,0.678,0.67,0.666,0.684,0.677,0.673,0.667,0.664,0.662
4,BHOCS,S,0.692,0.69,0.689,0.7,0.697,0.696,0.664,0.657,0.653
5,BHO,S,0.551,0.543,0.525,0.547,0.539,0.513,0.554,0.541,0.502
