In [1]:
# import model
# load data samples
# feed and get weights at observation level.
# get features from feature mapper
# interpret.

In [65]:
import torch
import numpy as np
import distilled_ctr.model.nam as nam
from distilled_ctr.dataset.avazu import AvazuDataset

In [40]:
dataset_name = 'avazu'
dataset_path = '../data/avazu/small'
dataset = AvazuDataset(dataset_path,cache_path='../.avazu', rebuild_cache=False)
cache = dataset.cache
field_dims = dataset.field_dims
feature_mapper = dataset.feat_mapper

features = ['hour','C1','banner_pos','site_id','site_domain','site_category',
'app_id','app_domain','app_category','device_id','device_ip','device_model',
'device_type','device_conn_type','C14','C15','C16','C17','C18','C19','C20','C21']


In [4]:
print(cache[0])
print(field_dims)
print(len(feature_mapper))
print(feature_mapper[2])

[  0   0   1   1  18  75   2  20   2   5   0  45 215   0   1   5   0   2
  16   1   6  14  16]
[  2   5   3  86  78  11  47  17   9   7  73 216   5   5  98   4   4  73
   5  27  49  24]
22
{'1007': 0, '1005': 1, '1010': 2, '1002': 3}


In [5]:
MODEL_PATH = '../chkpt/avazu_nam.pt'
embed_dim = 16
model = nam.NeuralAdditiveModel(
    input_size=len(field_dims),
    field_dims=field_dims,
    embed_dim=embed_dim,
    shallow_units=embed_dim,
    hidden_units=list(map(int, [])),
    shallow_layer=nam.ExULayer,
    hidden_layer=nam.ExULayer,
    hidden_dropout=0,
    feature_dropout=0
)
model.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

In [110]:
index = 1770
record = torch.tensor(cache[index], dtype=torch.long)
x,target = record[1:], record[0]
print(record)
p = model(x)
p, target

tensor([  0,   0,   0,   1,  64,  33,   6,  46,  16,   7,   6,  72, 215,   0,
          1,  50,   0,   2,   9,   2,  25,  18,  11])


(tensor([0.2461], grad_fn=<SigmoidBackward>), tensor(0))

In [109]:
# for i,rec in enumerate(cache):
#     if rec[2] == 0:
#         print(i)
#         print(rec)
#         break

1770
[  0   0   0   1  64  33   6  46  16   7   6  72 215   0   1  50   0   2
   9   2  25  18  11]


In [111]:
weights = model.get_observation_weights(x).tolist()
weights
for feature,weight in zip(features,weights):
    print(f'{feature} : {weight}')

hour : 0.36594945192337036
C1 : 0.38024839758872986
banner_pos : 0.4000239670276642
site_id : 0.5
site_domain : 0.48257145285606384
site_category : 0.4371621310710907
app_id : 0.5
app_domain : 0.35416099429130554
app_category : 0.42346879839897156
device_id : 0.6363614797592163
device_ip : 0.2787962555885315
device_model : 0.8083107471466064
device_type : 0.5
device_conn_type : 0.6672606468200684
C14 : 0.0058508808724582195
C15 : 0.5
C16 : 0.3814810514450073
C17 : 0.6049033999443054
C18 : 0.5
C19 : 0.5333901047706604
C20 : 0.815177857875824
C21 : 0.967589795589447


## feature contributions at global level

In [44]:
feature_index = 2
feature_mapper[feature_index]

{'1007': 0, '1005': 1, '1010': 2, '1002': 3}

In [89]:
feature_indices = np.array(list(feature_mapper[feature_index].values()))
embed_indices = feature_indices + model.embedding.offsets[feature_index-1]
embed_indices = torch.tensor(embed_indices, dtype=torch.long)
x = model.embedding.embedding(embed_indices)
fnn_out = model.feature_nns[feature_index-1](x)
feature_weights = torch.sigmoid(fnn_out)
feature_weights

tensor([[[0.3802]],

        [[0.3700]],

        [[0.2536]],

        [[0.2536]]], grad_fn=<SigmoidBackward>)

In [90]:
feature_mapper[2]

{'1007': 0, '1005': 1, '1010': 2, '1002': 3}

torch.Size([4, 16])