In [13]:
import torch
import numpy as np
import distilled_ctr.model.nafm as nafm
from distilled_ctr.dataset.avazu import AvazuDataset

In [28]:
dataset_name = 'avazu'
dataset_path = '../data/avazu/train'
CACHE_PATH = '../cache/avazu'
dataset = AvazuDataset(dataset_path,cache_path=CACHE_PATH, rebuild_cache=False) #'../.avazu'
cache = dataset.cache
field_dims = dataset.field_dims
feature_mapper = dataset.feat_mapper

features = ['hour','C1','banner_pos','site_id','site_domain','site_category',
'app_id','app_domain','app_category','device_id','device_ip','device_model',
'device_type','device_conn_type','C14','C15','C16','C17','C18','C19','C20','C21']


In [29]:
print(len(cache))
print(cache[0])
print(field_dims)
print(len(feature_mapper))
print(feature_mapper[2])

40428967
[     0    161      6      6   1870   3835      1   2393     21     22
 108874 802158   3019      0      2   1822      5      5     28      3
     59     94     13]
[    241       8       8    3697    4614      25    5481     329      31
  381763 1611748    6793       6       5    2509       9      10     432
       5      68     169      61]
22
{'1012': 0, '1001': 1, '1008': 2, '1010': 3, '1007': 4, '1002': 5, '1005': 6}


In [30]:
MODEL_PATH = '../chkpt/avazu_nafm.pt'
embed_dim = 16
model = nafm.NeuralAdditiveFactorizationMachines(
    input_size=len(field_dims),
    field_dims=field_dims,
    embed_dim=embed_dim,
    shallow_units=embed_dim,
    hidden_units=list(map(int, [1])),
    shallow_layer=nam.ExULayer,
    hidden_layer=nam.ReLULayer,
    hidden_dropout=0,
    feature_dropout=0
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))

<All keys matched successfully>

In [32]:
index = 100
record = torch.tensor(cache[index], dtype=torch.long)
x,target = record[1:], record[0]
print(record)
p = model(x)
p, target

tensor([     0,    161,      6,      0,   2801,   3267,      2,   2393,     21,
            22, 108874, 120256,    684,      0,      3,   2419,      5,      5,
           207,      3,      2,     99,     37])


(tensor([0.0490], grad_fn=<SigmoidBackward>), tensor(0))

In [109]:
# for i,rec in enumerate(cache):
#     if rec[2] == 0:
#         print(i)
#         print(rec)
#         break

1770
[  0   0   0   1  64  33   6  46  16   7   6  72 215   0   1  50   0   2
   9   2  25  18  11]


In [39]:
index = 10000
record = torch.tensor(cache[index], dtype=torch.long)
x,target = record[1:], record[0]
print(record)
p = model(x)
print(f'pred {p}')
print(f'target {target}')

weights = model.get_observation_weights(x).tolist()
weights
for feature,weight in zip(features,weights):
    print(f'{feature} : {weight}')

tensor([      0,     161,       6,       6,     480,    2671,      21,     115,
             37,       0,  370198, 1611747,    3146,       0,       1,     443,
              5,       5,      28,       3,      59,      38,      13])
pred tensor([0.1792], grad_fn=<SigmoidBackward>)
target 0
hour : 0.5
C1 : 0.5
banner_pos : 0.5
site_id : 0.5
site_domain : 0.5
site_category : 0.5
app_id : 0.5
app_domain : 0.5
app_category : 0.5
device_id : 0.4764660596847534
device_ip : 0.5
device_model : 0.5
device_type : 0.5
device_conn_type : 0.5
C14 : 0.5
C15 : 0.4316590428352356
C16 : 0.5
C17 : 0.5
C18 : 0.5
C19 : 0.27848759293556213
C20 : 0.5
C21 : 0.3776298463344574


## feature contributions at global level

In [43]:
feature_index = 3
feature_mapper[feature_index]

{'1': 0, '4': 1, '7': 2, '2': 3, '3': 4, '5': 5, '0': 6}

In [44]:
feature_indices = np.array(list(feature_mapper[feature_index].values()))
embed_indices = feature_indices + model.embedding.offsets[feature_index-1]
embed_indices = torch.tensor(embed_indices, dtype=torch.long)
x = model.embedding.embedding(embed_indices)
fnn_out = model.feature_nns[feature_index-1](x)
feature_weights = torch.sigmoid(fnn_out)
feature_weights

tensor([[[0.5000]],

        [[0.5000]],

        [[0.5000]],

        [[0.5000]],

        [[0.5000]],

        [[0.5000]],

        [[0.5000]]], grad_fn=<SigmoidBackward>)

In [90]:
feature_mapper[2]

{'1007': 0, '1005': 1, '1010': 2, '1002': 3}

torch.Size([4, 16])