# Process terms 
Process terms retrieved from the neural network model using `LIME`

## Load the retrieved terms

In [1]:
import os

In [2]:
root_dir = '../..'
data_dir = 'data/terms'
filename = 'relevant_terms_alaska_nn_noisy.csv'
filepath = os.path.join(root_dir, data_dir, filename)

In [3]:
import pandas as pd

In [4]:
terms_df = pd.read_csv(filepath)

In [5]:
terms_df.head()

Unnamed: 0,label,term,weight,data_id
0,ENTITY#44,nikon_d3200_dslr_camera,0.368663,0
1,ENTITY#44,55_200mm_lenses,0.218637,0
2,ENTITY#44,black_ebay,0.171967,0
3,ENTITY#44,18_55mm,0.112081,0
4,ENTITY#44,nikon_d3200,0.515836,1


---

## Rank terms

### Aggregate (sum) LIME weights for each term

In [6]:
terms_stats_df = terms_df.groupby(['label', 'term']).agg({'weight': 'sum'}).reset_index()

In [7]:
terms_stats_df.head()

Unnamed: 0,label,term,weight
0,ENTITY#101,22_3_mp_dslr_kit,0.130257
1,ENTITY#101,5d,1.146376
2,ENTITY#101,5d_mark,9.793319
3,ENTITY#101,5d_mark_iii,37.501637
4,ENTITY#101,5d_mark_iii_black_slr_digital_camera_kit_w/,1.635001


### Sort terms
Rank terms by total weight (descending)

In [8]:
ranking_df = terms_stats_df.groupby('label')\
                .apply(lambda grp: grp.sort_values(by='weight', ascending=False))\
                .reset_index(drop=True)

In [9]:
ranking_df.groupby('label').head(3)

Unnamed: 0,label,term,weight
0,ENTITY#101,5d_mark_iii,37.501637
1,ENTITY#101,5d_mark,9.793319
2,ENTITY#101,canon_eos,9.270045
50,ENTITY#16,nikon_d90,59.187
51,ENTITY#16,nikon_d90_camera,1.894752
52,ENTITY#16,nikon_d90_dx_12_3mp_digital_slr_camera,1.866237
84,ENTITY#18,canon_eos_60d,51.488703
85,ENTITY#18,nikon_d300,32.064259
86,ENTITY#18,canon,12.011575
261,ENTITY#23,canon_eos_7d,100.059516


---

## Save ranking

In [10]:
ranking_filename = 'ranking_alaska_nn_noisy.xlsx'
ranking_filepath = os.path.join(root_dir, data_dir, ranking_filename)

In [11]:
ranking_filepath

'../../data/terms/ranking_alaska_nn_noisy.xlsx'

In [12]:
ranking_df.to_excel(ranking_filepath)

---