<a href="https://colab.research.google.com/github/hristijanpeshov/SHAP-Explainable-Lexicon-Model/blob/master/notebooks/FinBERT%20notebooks/FinBERT_model_evaluation_all_metrics_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# User Input

In [None]:
# enter the location of the summary_df.csv file for each of the lexicons
# this file is located in the results folder of the appropriate lexicon

nasdaq_res_loc = '/content/drive/MyDrive/finbert process/nasdaq/concatenated datasets/results/summary_df.csv'
fpb_res_loc = '/content/drive/MyDrive/finbert process/fpb/concatenated datasets/results/summary_df.csv'
sentfin_res_loc = '/content/drive/MyDrive/finbert process/sentfin/concatenated datasets/results/summary_df.csv'

# All metrics summary

In [None]:
import pandas as pd

nasdaq_res = pd.read_csv(nasdaq_res_loc)
fpb_res = pd.read_csv(fpb_res_loc)
sentfin_res = pd.read_csv(sentfin_res_loc)

In [None]:
sentfin_res['Lexicon Source'] = 'fiqa_fpb_sentfin_neutral'
fpb_res['Lexicon Source'] = 'financial_phrase_bank'

In [None]:
def get_metric_values(df, eval_df, normalized, metric):
  # the different types of evaluation
  word_sources = ['LMD', 'OUR', 'OUR + LMD', 'LMD on LMD', 'OUR on LMD', 'OUR + LMD on LMD']
  all_metric_values = []

  for ws in word_sources:
    eval_df_mask = df['Evaluation Dataset'] == eval_df
    lexicon_normalized_mask = df['Lexicon Normalized'] == normalized
    word_source_mask = df['Words Source'] == ws

    combined_mask = eval_df_mask & lexicon_normalized_mask & word_source_mask

    # extracting the selected metric values for all lexicon sources
    metric_value = df[combined_mask][metric].values[0]

    all_metric_values.append(metric_value)

  return all_metric_values

def is_coef_irregular(coefs):
  return len(coefs) != 1 and '\\' not in coefs

def get_coefs(df):
  c1 = df['C1'].unique()
  c2 = df['C2'].unique()
  c3 = df['C3'].unique()
  c4 = df['C4'].unique()

  if is_coef_irregular(c1) or is_coef_irregular(c2) or is_coef_irregular(c3) or is_coef_irregular(c4):
    print('Missing values for coefficients')

  return [c1[0], c2[0], c3[0], c4[0]]

def create_summary_dataset(df, metric):
  # source lexicon name
  source_df = df['Lexicon Source'].unique()[0]

  # evaluation dataset names
  eval_dfs = df['Evaluation Dataset'].unique()
  # is the lexicon normalized
  normalized = True
  # extracting the coefficients
  coefs = get_coefs(df)
  # the decision maker is average_shap_values
  decision_maker = 'average_shap_values'

  summary_df_values = []

  # for the selected source lexicon and each of evaluation datasets, extract the metric value
  for ed in eval_dfs:

    for n in [normalized, not normalized]:
      metric_values = get_metric_values(df, ed, n, metric)
      row_value = [source_df, n, ed, decision_maker] + coefs + metric_values
      summary_df_values.append(row_value)

  cols = ['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset', 'Decision Maker', 'C1', 'C2', 'C3', 'C4',
          'LM', 'XLex', 'XLex + LM', 'LM on LM', 'XLex on LM', 'XLex + LM on LM']

  return pd.DataFrame(summary_df_values, columns = cols)

In [None]:
metrics = ['Accuracy', 'F1', 'MCC', 'Precision', 'Recall']
sources = [nasdaq_res, sentfin_res, fpb_res]
metric_dfs_map = {}

# for each metric values, extract the results for the source lexicons
for metric in metrics:
  df = pd.DataFrame()

  for source in sources:
    summary_dataset = create_summary_dataset(source, metric)
    df = pd.concat([df, summary_dataset], ignore_index = True)

  metric_dfs_map[metric] = df

In [None]:
# presenting the results in relation to the Accuracy
metric_dfs_map['Accuracy'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
2,nasdaq,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.302825,0.768362,0.778531,0.752809,0.735955,0.761236
4,nasdaq,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.313433,0.761194,0.771144,0.807692,0.833333,0.858974
0,nasdaq,True,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.29572,0.747082,0.75227,0.747541,0.754098,0.767213
6,nasdaq,True,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.274788,0.725212,0.728045,0.751938,0.775194,0.782946
3,nasdaq,False,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.302825,0.693785,0.722034,0.752809,0.69382,0.764045
5,nasdaq,False,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.313433,0.761194,0.761194,0.807692,0.846154,0.846154
1,nasdaq,False,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.29572,0.70882,0.723735,0.747541,0.729508,0.767213
7,nasdaq,False,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.274788,0.713881,0.72238,0.751938,0.767442,0.790698
10,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.302825,0.830508,0.836158,0.752809,0.755618,0.769663
12,fiqa_fpb_sentfin_neutral,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.313433,0.791045,0.810945,0.807692,0.794872,0.846154


In [None]:
# presenting the results in relation to the F1
metric_dfs_map['F1'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
2,nasdaq,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.286817,0.421435,0.432381,0.688254,0.441095,0.69387
4,nasdaq,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.297491,0.496581,0.505057,0.789379,0.802069,0.836353
0,nasdaq,True,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.295412,0.448329,0.457878,0.725735,0.476969,0.740413
6,nasdaq,True,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.304723,0.683613,0.693177,0.751565,0.774326,0.782933
3,nasdaq,False,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.286817,0.389312,0.407581,0.688254,0.418533,0.696443
5,nasdaq,False,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.297491,0.497711,0.499818,0.789379,0.81502,0.823396
1,nasdaq,False,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.295412,0.427824,0.441595,0.725735,0.463686,0.740413
7,nasdaq,False,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.304723,0.676496,0.691917,0.751565,0.766304,0.790698
10,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.286817,0.708175,0.724109,0.688254,0.672929,0.701623
12,fiqa_fpb_sentfin_neutral,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.297491,0.521668,0.536802,0.789379,0.764528,0.823396


In [None]:
# presenting the results in relation to the MCC
metric_dfs_map['MCC'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
2,nasdaq,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.191545,0.314963,0.351917,0.489058,0.413736,0.48948
4,nasdaq,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.215596,0.497095,0.52481,0.593824,0.604496,0.673067
0,nasdaq,True,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.208682,0.374968,0.414537,0.519298,0.451056,0.526439
6,nasdaq,True,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.264968,0.417022,0.452251,0.566951,0.564648,0.612778
3,nasdaq,False,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.191545,0.278537,0.326022,0.489058,0.378072,0.492794
5,nasdaq,False,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.215596,0.503234,0.515987,0.593824,0.631532,0.648181
1,nasdaq,False,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.208682,0.334136,0.38125,0.519298,0.424667,0.526439
7,nasdaq,False,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.264968,0.415274,0.464746,0.566951,0.5461,0.624444
10,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.191545,0.454297,0.494493,0.489058,0.417853,0.49951
12,fiqa_fpb_sentfin_neutral,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.215596,0.565072,0.607936,0.593824,0.53033,0.648181


In [None]:
# presenting the results in relation to the Precision
metric_dfs_map['Precision'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
2,nasdaq,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.456199,0.412915,0.421709,0.684298,0.439031,0.685809
4,nasdaq,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.520018,0.49652,0.504848,0.780027,0.805929,0.83283
0,nasdaq,True,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.485867,0.441445,0.450676,0.728801,0.471238,0.734272
6,nasdaq,True,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.525425,0.680328,0.693823,0.788138,0.778767,0.807224
3,nasdaq,False,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.456199,0.397854,0.409574,0.684298,0.427754,0.687362
5,nasdaq,False,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.520018,0.497773,0.500636,0.780027,0.823864,0.817308
1,nasdaq,False,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.485867,0.426595,0.439103,0.728801,0.460954,0.734272
7,nasdaq,False,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.525425,0.678128,0.698065,0.788138,0.769481,0.812222
10,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.456199,0.679563,0.692487,0.684298,0.662076,0.690537
12,fiqa_fpb_sentfin_neutral,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.520018,0.526668,0.541522,0.780027,0.759615,0.817308


In [None]:
# presenting the results in relation to the Recall
metric_dfs_map['Recall'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
2,nasdaq,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.251752,0.472915,0.489652,0.824444,0.514542,0.822361
4,nasdaq,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.210644,0.503907,0.514659,0.814815,0.798611,0.840278
0,nasdaq,True,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.239366,0.477886,0.496436,0.794656,0.497436,0.795744
6,nasdaq,True,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.261609,0.741098,0.763811,0.778889,0.785926,0.805556
3,nasdaq,False,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.251752,0.467642,0.488698,0.824444,0.502552,0.824033
5,nasdaq,False,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.210644,0.506885,0.512841,0.814815,0.80787,0.831019
1,nasdaq,False,fpb_fiqa,average_shap_values,0.5,0.1,0.1,0.5,0.239366,0.466072,0.486209,0.794656,0.490422,0.795744
7,nasdaq,False,sem_eval,average_shap_values,0.5,0.1,0.1,0.5,0.261609,0.742035,0.772624,0.778889,0.776667,0.812222
10,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.5,0.1,0.1,0.5,0.251752,0.787345,0.817585,0.824444,0.769319,0.827378
12,fiqa_fpb_sentfin_neutral,True,fiqa_labeled_df,average_shap_values,0.5,0.1,0.1,0.5,0.210644,0.521273,0.536822,0.814815,0.770833,0.831019
