<a href="https://colab.research.google.com/github/hristijanpeshov/SHAP-Explainable-Lexicon-Model/blob/master/Model_Evaluation_All_Metrics_Results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# User Input

In [2]:
# enter the location of the summary_df.csv file for each of the lexicons
# this file is located in the results folder of the appropriate lexicon

nasdaq_res_loc = '/content/drive/MyDrive/nasdaq/concatenated datasets/results/summary_df.csv'
fpb_res_loc = '/content/drive/MyDrive/fpb/concatenated datasets/results/summary_df.csv'
sentfin_res_loc = '/content/drive/MyDrive/sentfin/concatenated datasets/results/summary_df.csv'

# All metrics summary

In [3]:
import pandas as pd

nasdaq_res = pd.read_csv(nasdaq_res_loc)
fpb_res = pd.read_csv(fpb_res_loc)
sentfin_res = pd.read_csv(sentfin_res_loc)

In [4]:
sentfin_res['Lexicon Source'] = 'fiqa_fpb_sentfin_neutral'
fpb_res['Lexicon Source'] = 'financial_phrase_bank'

In [5]:
def get_metric_values(df, eval_df, normalized, metric):
  # the different types of evaluation
  word_sources = ['LMD', 'OUR', 'OUR + LMD', 'LMD on LMD', 'OUR on LMD', 'OUR + LMD on LMD']
  all_metric_values = []

  for ws in word_sources:
    eval_df_mask = df['Evaluation Dataset'] == eval_df
    lexicon_normalized_mask = df['Lexicon Normalized'] == normalized
    word_source_mask = df['Words Source'] == ws

    combined_mask = eval_df_mask & lexicon_normalized_mask & word_source_mask

    # extracting the selected metric values for all lexicon sources
    metric_value = df[combined_mask][metric].values[0]

    all_metric_values.append(metric_value)

  return all_metric_values

def is_coef_irregular(coefs):
  return len(coefs) != 1 and '\\' not in coefs

def get_coefs(df):
  c1 = df['C1'].unique()
  c2 = df['C2'].unique()
  c3 = df['C3'].unique()
  c4 = df['C4'].unique()

  if is_coef_irregular(c1) or is_coef_irregular(c2) or is_coef_irregular(c3) or is_coef_irregular(c4):
    print('Missing values for coefficients')

  return [c1[0], c2[0], c3[0], c4[0]]

def create_summary_dataset(df, metric):
  # source lexicon name
  source_df = df['Lexicon Source'].unique()[0]

  # evaluation dataset names
  eval_dfs = df['Evaluation Dataset'].unique()
  # is the lexicon normalized
  normalized = True
  # extracting the coefficients
  coefs = get_coefs(df)
  # the decision maker is average_shap_values
  decision_maker = 'average_shap_values'

  summary_df_values = []

  # for the selected source lexicon and each of evaluation datasets, extract the metric value
  for ed in eval_dfs:

    for n in [normalized, not normalized]:
      metric_values = get_metric_values(df, ed, n, metric)
      row_value = [source_df, n, ed, decision_maker] + coefs + metric_values
      summary_df_values.append(row_value)

  cols = ['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset', 'Decision Maker', 'C1', 'C2', 'C3', 'C4',
          'LM', 'XLex', 'XLex + LM', 'LM on LM', 'XLex on LM', 'XLex + LM on LM']

  return pd.DataFrame(summary_df_values, columns = cols)

In [6]:
metrics = ['Accuracy', 'F1', 'MCC']
sources = [nasdaq_res, sentfin_res, fpb_res]
metric_dfs_map = {}

# for each metric values, extract the results for the source lexicons
for metric in metrics:
  df = pd.DataFrame()

  for source in sources:
    summary_dataset = create_summary_dataset(source, metric)
    df = pd.concat([df, summary_dataset], ignore_index = True)

  metric_dfs_map[metric] = df

In [7]:
# presenting the results in relation to the Accuracy
metric_dfs_map['Accuracy'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.366834,0.688442,0.748744,0.793478,0.679348,0.809783
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.306215,0.844068,0.824859,0.744505,0.799451,0.752747
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.267574,0.668934,0.679138,0.700297,0.673591,0.700297
0,nasdaq,True,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.282101,0.763294,0.757458,0.722591,0.740864,0.725914
7,nasdaq,False,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.366834,0.69598,0.746231,0.793478,0.695652,0.804348
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.306215,0.831638,0.818079,0.744505,0.782967,0.75
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.267574,0.657596,0.674603,0.700297,0.655786,0.700297
1,nasdaq,False,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.282101,0.747082,0.748379,0.722591,0.72093,0.724252
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.366834,0.703518,0.746231,0.793478,0.711957,0.804348
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.306215,0.80904,0.787571,0.744505,0.804945,0.752747


In [8]:
# presenting the results in relation to the F1
metric_dfs_map['F1'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.32673,0.680172,0.748229,0.77971,0.679263,0.798794
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.284005,0.461195,0.703642,0.676389,0.449023,0.683772
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.266569,0.412101,0.435333,0.698769,0.641378,0.698769
0,nasdaq,True,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.276376,0.435724,0.453296,0.694347,0.440336,0.69733
7,nasdaq,False,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.32673,0.689614,0.74587,0.77971,0.695616,0.792481
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.284005,0.452117,0.695142,0.676389,0.440942,0.681301
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.266569,0.405818,0.432036,0.698769,0.628714,0.698769
1,nasdaq,False,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.276376,0.424205,0.445647,0.694347,0.430688,0.695837
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.32673,0.700492,0.74623,0.77971,0.71188,0.792481
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.284005,0.449606,0.672222,0.676389,0.468134,0.683772


In [9]:
# presenting the results in relation to the MCC
metric_dfs_map['MCC'].sort_values(by=['Lexicon Source', 'Lexicon Normalized', 'Evaluation Dataset'], ascending=[False, False, True])

Unnamed: 0,Lexicon Source,Lexicon Normalized,Evaluation Dataset,Decision Maker,C1,C2,C3,C4,LM,XLex,XLex + LM,LM on LM,XLex on LM,XLex + LM on LM
6,nasdaq,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.250719,0.395649,0.498888,0.584795,0.388434,0.616635
4,nasdaq,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.187135,0.390839,0.449872,0.473721,0.358457,0.482897
2,nasdaq,True,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.173343,0.236421,0.315616,0.445288,0.286082,0.445288
0,nasdaq,True,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.183169,0.310065,0.385248,0.470736,0.322288,0.474761
7,nasdaq,False,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.250719,0.406998,0.493337,0.584795,0.411414,0.606022
5,nasdaq,False,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.187135,0.368028,0.434927,0.473721,0.342806,0.479814
3,nasdaq,False,fiqa_labeled_df,average_shap_values,0.8,0.2,0.9,0.5,0.173343,0.217747,0.305715,0.445288,0.257871,0.445288
1,nasdaq,False,fpb_fiqa,average_shap_values,0.8,0.2,0.9,0.5,0.183169,0.277611,0.362327,0.470736,0.297872,0.472746
14,fiqa_fpb_sentfin_neutral,True,dev_df,average_shap_values,0.8,0.2,0.9,0.5,0.250719,0.414085,0.492544,0.584795,0.442558,0.606022
12,fiqa_fpb_sentfin_neutral,True,financial_phrase_bank,average_shap_values,0.8,0.2,0.9,0.5,0.187135,0.38431,0.415549,0.473721,0.431645,0.482897
