# Rank tokens
Process terms retrieved from the neural network model using `LIME`

In [1]:
import os
import pandas as pd

In [2]:
root_dir = '../..'
data_dir = 'data/terms'

In [3]:
dataset_name = 'alaska'
model_names = ['nn', 'bert']
test_names = ['sampling_terms_test1',
              'sampling_terms_test2',
              'sampling_records_test1',
              'sampling_records_test2',
              'sampling_records_and_terms_test1']

In [4]:
def compute_rankings(dataset_name, model_name, test_name):
    filename = f'relevant_terms_{dataset_name}_{model_name}_{test_name}.csv'
    filepath = os.path.join(root_dir, data_dir, filename)
    terms_df = pd.read_csv(filepath)
    # aggregate (sum) LIME weights for each term
    terms_stats_df = terms_df.groupby(['label', 'term']).agg({'weight': 'sum'}).reset_index()
    
    # sort terms
    ranking_df = terms_stats_df.groupby('label')\
                 .apply(lambda grp: grp.sort_values(by='weight', ascending=False))\
                 .reset_index(drop=True)
    
    ranking_filename = f'ranking_{dataset_name}_{model_name}_{test_name}.xlsx'
    ranking_filepath = os.path.join(root_dir, data_dir, ranking_filename)
    ranking_df.to_excel(ranking_filepath)

In [5]:
for model_name in model_names:
    for test_name in test_names:
        compute_rankings(dataset_name, model_name, test_name)

---