# User Input

In [None]:
# enter the location of the summary_df.csv file for each of the lexicons
# this file is located in the results folder of the appropriate lexicon

nasdaq_res_loc = '/content/drive/MyDrive/finbert process/nasdaq/results/summary_df.csv'
fpb_res_loc = '/content/drive/MyDrive/finbert process/fpb/results/summary_df.csv'
sentfin_res_loc = '/content/drive/MyDrive/finbert process/sentfin/results/summary_df.csv'

# All metrics summary

In [None]:
import pandas as pd

nasdaq_res = pd.read_csv(nasdaq_res_loc)
fpb_res = pd.read_csv(fpb_res_loc)
sentfin_res = pd.read_csv(sentfin_res_loc)

In [None]:
sentfin_res['Lexicon Source'] = 'fiqa_fpb_sentfin_neutral'
fpb_res['Lexicon Source'] = 'financial_phrase_bank'

In [None]:
def get_metric_values(df, eval_df, normalized, metric):
  # the different types of evaluation
  word_sources = ['LMD', 'OUR', 'OUR + LMD', 'LMD on LMD', 'OUR on LMD', 'OUR + LMD on LMD']
  all_metric_values = []

  for ws in word_sources:
    eval_df_mask = df['Evaluation Dataset'] == eval_df
    lexicon_normalized_mask = df['Lexicon Normalized'] == normalized
    word_source_mask = df['Words Source'] == ws

    combined_mask = eval_df_mask & lexicon_normalized_mask & word_source_mask

    # extracting the selected metric values for all lexicon sources
    metric_value = df[combined_mask][metric].values[0]

    all_metric_values.append(metric_value)

  return all_metric_values

def is_coef_irregular(coefs):
  return len(coefs) != 1 and '\\' not in coefs

def get_coefs(df):
  c1 = df['C1'].unique()
  c2 = df['C2'].unique()
  c3 = df['C3'].unique()
  c4 = df['C4'].unique()

  if is_coef_irregular(c1) or is_coef_irregular(c2) or is_coef_irregular(c3) or is_coef_irregular(c4):
    print('Missing values for coefficients')

  return [c1[0], c2[0], c3[0], c4[0]]

def create_summary_dataset(df, metric):
  # source lexicon name
  source_df = df['Lexicon Source'].unique()[0]

  # evaluation dataset names
  eval_dfs = df['Evaluation Dataset'].unique()
  # is the lexicon normalized
  normalized = True
  # extracting the coefficients
  coefs = get_coefs(df)
  # the decision maker is average_shap_values
  decision_maker = 'average_shap_values'

  summary_df_values = []

  # for the selected source lexicon and each of evaluation datasets, extract the metric value
  for ed in eval_dfs:

    for n in [normalized, not normalized]:
      metric_values = get_metric_values(df, ed, n, metric)
      row_value = [source_df, n, ed, decision_maker] + coefs + metric_values
      summary_df_values.append(row_value)

  cols = ['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset', 'Decision Maker', 'C1', 'C2', 'C3', 'C4',
          'LM', 'XLex', 'XLex + LM', 'LM on LM', 'XLex on LM', 'XLex + LM on LM']

  return pd.DataFrame(summary_df_values, columns = cols)

In [None]:
metrics = ['Accuracy', 'F1', 'MCC']
sources = [nasdaq_res, sentfin_res, fpb_res]
metric_dfs_map = {}

# for each metric values, extract the results for the source lexicons
for metric in metrics:
  df = pd.DataFrame()

  for source in sources:
    summary_dataset = create_summary_dataset(source, metric)
    df = pd.concat([df, summary_dataset], ignore_index = True)

  metric_dfs_map[metric] = df

In [None]:
# presenting the results in relation to the Accuracy
metric_dfs_map['Accuracy'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.361809,0.756281,0.753769,0.8,0.811111,0.805556
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.300565,0.667797,0.707345,0.757835,0.65812,0.757835
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.274376,0.603175,0.622449,0.722388,0.674627,0.725373
0,nasdaq,True,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.284047,0.642023,0.667315,0.742373,0.677966,0.744068
7,nasdaq,False,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.361809,0.736181,0.743719,0.8,0.783333,0.8
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.300565,0.649718,0.698305,0.757835,0.635328,0.757835
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.274376,0.598639,0.624717,0.722388,0.653731,0.722388
1,nasdaq,False,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.284047,0.63035,0.66537,0.742373,0.650847,0.742373
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.361809,0.766332,0.798995,0.8,0.744444,0.816667
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.300565,0.847458,0.847458,0.757835,0.757835,0.757835


In [None]:
# presenting the results in relation to the Accuracy
metric_dfs_map['F1'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.325032,0.749173,0.748043,0.78729,0.795318,0.793787
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.28261,0.39311,0.413041,0.688065,0.60662,0.688065
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.276173,0.402427,0.414955,0.721753,0.450349,0.725077
0,nasdaq,True,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.284615,0.406212,0.421964,0.717831,0.438716,0.721192
7,nasdaq,False,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.325032,0.728487,0.738542,0.78729,0.761021,0.78729
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.28261,0.38163,0.405922,0.688065,0.587968,0.688065
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.276173,0.3996,0.416143,0.721753,0.436458,0.721753
1,nasdaq,False,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.284615,0.399432,0.419746,0.717831,0.423107,0.717831
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.325032,0.766153,0.798812,0.78729,0.739819,0.803253
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.28261,0.738017,0.745595,0.688065,0.678935,0.692286


In [None]:
# presenting the results in relation to the Accuracy
metric_dfs_map['MCC'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.251666,0.547226,0.534831,0.596553,0.629927,0.607449
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.185572,0.36775,0.393666,0.487274,0.40838,0.487274
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.190023,0.32843,0.363832,0.48071,0.40125,0.49505
0,nasdaq,True,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.195488,0.369392,0.407152,0.503581,0.412836,0.516313
7,nasdaq,False,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.251666,0.50445,0.510262,0.596553,0.577695,0.596553
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.185572,0.33763,0.37061,0.487274,0.388781,0.487274
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.7,0.2,0.3,0.5,0.190023,0.325049,0.360092,0.48071,0.373605,0.48071
1,nasdaq,False,fpb_fiqa,average_shap_values,0.7,0.2,0.3,0.5,0.195488,0.356036,0.395532,0.503581,0.385878,0.503581
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.7,0.2,0.3,0.5,0.251666,0.533091,0.599607,0.596553,0.479638,0.63685
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.7,0.2,0.3,0.5,0.185572,0.517514,0.543086,0.487274,0.448138,0.506898
