# Process terms 
Process terms retrieved from the neural network model using LIME

---

## Load the retrieved terms

In [1]:
import os

In [2]:
root_dir = '../..'
data_dir = 'data/terms'
filename = 'relevant_terms_nn.csv'
filepath = os.path.join(root_dir, data_dir, filename)

In [3]:
import pandas as pd

In [4]:
terms_df = pd.read_csv(filepath)

In [5]:
terms_df.head()

Unnamed: 0,label,term,weight,data_id
0,ENTITY#44,nikon_d3200_dslr_camera,0.221665,0
1,ENTITY#44,55_200mm_lenses,0.197231,0
2,ENTITY#44,18_55mm,0.11551,0
3,ENTITY#44,black_ebay,0.10225,0
4,ENTITY#44,nikon_d3200,0.571242,1


---

## Rank terms

### Add ranking column
Add a ranking column by sorting each document's terms by their LIME weight in descending order

In [6]:
terms_df['rank'] = terms_df.groupby(['label', 'data_id'])['weight'].rank(ascending=False)

In [7]:
terms_df.head()

Unnamed: 0,label,term,weight,data_id,rank
0,ENTITY#44,nikon_d3200_dslr_camera,0.221665,0,1.0
1,ENTITY#44,55_200mm_lenses,0.197231,0,2.0
2,ENTITY#44,18_55mm,0.11551,0,3.0
3,ENTITY#44,black_ebay,0.10225,0,4.0
4,ENTITY#44,nikon_d3200,0.571242,1,1.0


### Compute stats

In [16]:
terms_stats_df_1 = terms_df.groupby(['label', 'term']).agg({'weight': 'sum','rank': 'mean'})
terms_stats_df_2 = terms_df.groupby(['label', 'term']).size().to_frame('count')
terms_stats_df = terms_stats_df_1.join(terms_stats_df_2).reset_index()

In [17]:
terms_stats_df.head()

Unnamed: 0,label,term,weight,rank,count
0,ENTITY#101,3mp,0.046001,3.0,1
1,ENTITY#101,5d,1.673307,1.6,5
2,ENTITY#101,5d_mark,10.450083,1.0,14
3,ENTITY#101,5d_mark_iii,37.700795,1.0,40
4,ENTITY#101,5d_mark_iii_black_slr_digital_camera_kit_w/,1.670841,1.0,2


### Ranking

In [29]:
terms_stats_df.groupby('label')\
    .apply(lambda grp: grp.sort_values(by=['rank', 'weight'], ascending=[True, False]))\
        .reset_index(drop=True).groupby('label').head(3)[['label', 'term']]

Unnamed: 0,label,term
0,ENTITY#101,5d_mark_iii
1,ENTITY#101,5d_mark
2,ENTITY#101,canon_eos_5d_mark_iii_22_3_mp_full_frame
49,ENTITY#18,canon_eos_60d
50,ENTITY#18,canon_eos_60d_digital_slr_essentials
51,ENTITY#18,canon_eos_60d_dslr_camera_kit
172,ENTITY#21,nikon_d5100
173,ENTITY#21,nikon_d5100_digital_slr_camera
174,ENTITY#21,2_mp_digital_slr_camera_black_body
316,ENTITY#23,canon_eos_7d


---