In [1]:
import pandas as pd
import numpy as np
import random
import time
import torch 
import torch.nn as nn
from torch.nn import Module
from torch.nn import init
from torch.optim import Adam,Adadelta,RMSprop
from torch.nn import BCELoss,BCEWithLogitsLoss
from torch.utils.data import DataLoader, Dataset

In [2]:
fp = 'C:/Users/w9753/Desktop/counterfactual/pairwiseDebias/datasets/MQ2008/'

train_log = pd.read_json(fp + 'click_log/Train_log.json')
vali_log = pd.read_json(fp + 'click_log/Vali_log.json')
test_dat = pd.read_json(fp + 'json_file/Test.json')

test_dat.rename(columns={'queryID':'qid', 'docID':'did'}, inplace = True)
test_dat = test_dat[['qid','did','label','feature']]

In [3]:
len(train_log),len(vali_log),len(test_dat)

(18769, 4265, 2874)

In [66]:
train_log.head()

Unnamed: 0,qid,did,label,isClick,rankPosition,feature,ips_weight,sid
0,64,1078,0,0,0,"[0.020649, 0.461538, 0.0, 1.0, 0.028515, 0.0, ...",0.0,0
1,64,1083,1,0,1,"[0.086037, 0.384615, 0.0, 0.0, 0.0884959999999...",0.0,0
2,64,1053,0,0,2,"[0.018191, 0.30769199999999997, 0.166666999999...",0.0,0
3,64,1073,0,0,3,"[0.009833, 0.384615, 0.0, 0.4, 0.014258, 0.0, ...",0.0,0
4,64,1056,0,0,4,"[0.021632, 0.30769199999999997, 0.5, 0.0, 0.02...",0.0,0


In [64]:
train_log[train_log['label']==1].head()

Unnamed: 0,qid,did,label,isClick,rankPosition,feature,ips_weight,sid
1,64,1083,1,0,1,"[0.086037, 0.384615, 0.0, 0.0, 0.0884959999999...",0.0,0
5,64,1104,1,0,5,"[0.042281, 0.30769199999999997, 0.0, 0.0, 0.04...",0.0,0
10,64,1096,1,0,10,"[0.07473, 0.07692299999999999, 0.0833329999999...",0.0,0
13,64,1088,1,0,13,"[0.265978, 0.07692299999999999, 0.0, 0.0, 0.26...",0.0,0
15,64,1084,1,0,15,"[0.902655, 0.07692299999999999, 0.0, 0.0, 0.90...",0.0,0


In [65]:
test_dat[test_dat['label']==1].head()

Unnamed: 0,qid,did,label,feature
3,0,3,1,"[0.026445999999999997, 0.75, 0.75, 0.5, 0.0364..."
10,1,10,1,"[0.120928, 0.14285699999999998, 0.333333, 0.0,..."
11,1,11,1,"[0.156819, 0.14285699999999998, 0.333333, 0.0,..."
12,1,12,1,"[0.218112, 0.28571399999999997, 0.333333, 0.0,..."
14,1,14,1,"[0.024295999999999998, 0.14285699999999998, 0...."


In [38]:
class Wrap_Dataset_fullInfo(Dataset):
    """Wrapper, convert <doc_tensor, relevance_label> Tensor into Pytorch Dataset"""
    def __init__(self, doc_tensor, label_tensor):
        self.label_tensor = label_tensor
        self.doc_tensor = doc_tensor

    def __getitem__(self, index):
        return self.doc_tensor[index],self.label_tensor[index]

    def __len__(self):
        return self.doc_tensor.size(0)

    
class Wrap_Dataset_clickLog(Dataset):
    """Wrapper, convert <doc_tensor, relevance_label> Tensor into Pytorch Dataset"""
    def __init__(self, doc_tensor, click_tensor, ips_tensor):
        self.click_tensor = click_tensor
        self.doc_tensor = doc_tensor
        self.ips_tensor = ips_tensor

    def __getitem__(self, index):
        return self.doc_tensor[index],self.click_tensor[index], self.ips_tensor[index]

    def __len__(self):
        return self.doc_tensor.size(0)

    
class rank_model(Module):
    def __init__(self, in_size, hidden_size):
        super(rank_model,self).__init__()
        self.linear_proj = nn.Sequential(
            nn.Linear(in_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1, bias=False))
        
    def weight_init(self):
        pass
    
    def forward(self, input_vec):
        logit = self.linear_proj(input_vec)
        prob = torch.sigmoid(logit)
        return prob
    
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
input_train = Wrap_Dataset_fullInfo(doc_tensor=torch.Tensor(train_log['feature']), label_tensor = torch.Tensor(train_log['label']))
input_train_loader = DataLoader(input_train, batch_size=param['batch_size'], shuffle=True)


In [54]:
# full info training

setup_seed(20)
config = {
    'use_cuda':False,
    'eval_positions':[1,3,5,7,10]
}

param = {
    'hidden_size':16,
    'lr':1e-4,
    'weight_decay':1e-3,
    'epoch': 500,
    'batch_size':128,
    'epoch_strat':0,
    'patience': 20
}

eval_positions = [1,3,5,7,10]

input_train = Wrap_Dataset_fullInfo(doc_tensor=torch.Tensor(train_log['feature']), label_tensor = torch.Tensor(train_log['label']))
input_train_loader = DataLoader(input_train, batch_size=param['batch_size'], shuffle=True)

in_size = len(train_log['feature'][0])
model = rank_model(in_size, param['hidden_size'])
optim = Adam(model.parameters(), lr=param['lr'],weight_decay=param['weight_decay'])
early_stopping = EarlyStopping(param['patience'], verbose=True) 

dur = []

for epoch in range(param['epoch']):
    if epoch >= 3:
        t0 = time.time()
        
    loss_log = []
    model.train() # 设置模型为训练模式
    for _id, batch in enumerate(input_train_loader):
        optim.zero_grad()
        BCE_lossfunc = BCELoss()
        #BCE_lossfunc = BCELoss(weight = batch[2])
        output = model(batch[0])
        output = output.view(batch[0].size(0))
        train_loss = BCE_lossfunc(output, batch[1])
        train_loss.backward()
        optim.step()
        loss_log.append(train_loss.item())
        
    val_loss, eval_result = evaluate_for_vali(model, loss_type = BCELoss, eval_log = vali_log, eval_positions = eval_positions, 
                                              with_weight = False, use_cuda = False)
    
    ndcg_val = eval_result['NDCG'][5]
    arp_val = eval_result['ARP'][5]
    map_val = eval_result['MAP'][5]
    
    if epoch > param['epoch_strat']:
        early_stopping(ndcg_val*(-1), model)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break 
    
    if epoch >= 3:
        dur.append(time.time() - t0)
        
    print("Epoch {:05d} | Time(s) {:.4f} | Train_Loss {:.4f} | Val_Loss {:.4f} | Val_NDCG@5 {:.4f} | "
            "Val_ARP@5 {:.4f}| Val_MAP@5 {:.4f} |". format(epoch, np.mean(dur), np.mean(loss_log),val_loss,
                                             ndcg_val, arp_val, map_val))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Time(s) nan | Train_Loss 0.6652 | Val_Loss 0.1596 | Val_NDCG@5 0.1004 | Val_ARP@5 0.2164| Val_MAP@5 0.0388 |
Validation metric Increased (inf --> -0.074800).  Saving model ...
Epoch 00001 | Time(s) nan | Train_Loss 0.5978 | Val_Loss 0.2014 | Val_NDCG@5 0.0748 | Val_ARP@5 0.1637| Val_MAP@5 0.0283 |
EarlyStopping counter: 1 out of 20
Epoch 00002 | Time(s) nan | Train_Loss 0.5578 | Val_Loss 0.2376 | Val_NDCG@5 0.0690 | Val_ARP@5 0.1495| Val_MAP@5 0.0262 |
EarlyStopping counter: 2 out of 20
Epoch 00003 | Time(s) 0.6123 | Train_Loss 0.5355 | Val_Loss 0.2661 | Val_NDCG@5 0.0711 | Val_ARP@5 0.1637| Val_MAP@5 0.0267 |
EarlyStopping counter: 3 out of 20
Epoch 00004 | Time(s) 0.5978 | Train_Loss 0.5227 | Val_Loss 0.2859 | Val_NDCG@5 0.0743 | Val_ARP@5 0.1665| Val_MAP@5 0.0282 |
Validation metric Increased (-0.074800 --> -0.077933).  Saving model ...
Epoch 00005 | Time(s) 0.5959 | Train_Loss 0.5147 | Val_Loss 0.2988 | Val_NDCG@5 0.0779 | Val_ARP@5 0.1786| Val_MAP@5 0.0296 |
Validati

EarlyStopping counter: 1 out of 20
Epoch 00045 | Time(s) 0.6189 | Train_Loss 0.3897 | Val_Loss 0.2841 | Val_NDCG@5 0.3908 | Val_ARP@5 0.3829| Val_MAP@5 0.2175 |
EarlyStopping counter: 2 out of 20
Epoch 00046 | Time(s) 0.6181 | Train_Loss 0.3888 | Val_Loss 0.2851 | Val_NDCG@5 0.3923 | Val_ARP@5 0.3758| Val_MAP@5 0.2198 |
Validation metric Increased (-0.392591 --> -0.393670).  Saving model ...
Epoch 00047 | Time(s) 0.6187 | Train_Loss 0.3882 | Val_Loss 0.2852 | Val_NDCG@5 0.3937 | Val_ARP@5 0.3836| Val_MAP@5 0.2199 |
EarlyStopping counter: 1 out of 20
Epoch 00048 | Time(s) 0.6197 | Train_Loss 0.3879 | Val_Loss 0.2797 | Val_NDCG@5 0.3883 | Val_ARP@5 0.3751| Val_MAP@5 0.2174 |
EarlyStopping counter: 2 out of 20
Epoch 00049 | Time(s) 0.6210 | Train_Loss 0.3878 | Val_Loss 0.2855 | Val_NDCG@5 0.3882 | Val_ARP@5 0.3751| Val_MAP@5 0.2172 |
EarlyStopping counter: 3 out of 20
Epoch 00050 | Time(s) 0.6227 | Train_Loss 0.3870 | Val_Loss 0.2836 | Val_NDCG@5 0.3882 | Val_ARP@5 0.3751| Val_MAP@5 0.217

In [51]:
# click log training
setup_seed(20)
config = {
    'use_cuda':False,
    'eval_positions':[1,3,5,7,10]
}

param = {
    'hidden_size':16,
    'lr':1e-4,
    'weight_decay':1e-3,
    'epoch': 500,
    'batch_size':128,
    'epoch_strat':0,
    'patience': 20
}

eval_positions = [1,3,5,7,10]

input_train = Wrap_Dataset_clickLog(doc_tensor=torch.Tensor(train_log['feature']), click_tensor = torch.Tensor(train_log['isClick']), 
                                   ips_tensor = torch.Tensor(train_log['ips_weight']))
input_train_loader = DataLoader(input_train, batch_size=param['batch_size'], shuffle=True)

in_size = len(train_log['feature'][0])
model = rank_model(in_size, param['hidden_size'])
optim = Adam(model.parameters(), lr=param['lr'],weight_decay=param['weight_decay'])
early_stopping = EarlyStopping(param['patience'], verbose=True) 

dur = []

for epoch in range(param['epoch']):
    if epoch >= 3:
        t0 = time.time()
        
    loss_log = []
    model.train() # 设置模型为训练模式
    for _id, batch in enumerate(input_train_loader):
        optim.zero_grad()
        #BCE_lossfunc = BCELoss()
        BCE_lossfunc = BCELoss(weight = batch[2])
        output = model(batch[0])
        output = output.view(batch[0].size(0))
        train_loss = BCE_lossfunc(output, batch[1])
        train_loss.backward()
        optim.step()
        loss_log.append(train_loss.item())
        
    val_loss, eval_result = evaluate_for_vali(model, loss_type = BCELoss, eval_log = vali_log, eval_positions = eval_positions, 
                                              with_weight = False, use_cuda = False)
    
    ndcg_val = eval_result['NDCG'][5]
    arp_val = eval_result['ARP'][5]
    map_val = eval_result['MAP'][5]
    
    if epoch > param['epoch_strat']:
        early_stopping(ndcg_val*(-1), model)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break 
    
    if epoch >= 3:
        dur.append(time.time() - t0)
        
    print("Epoch {:05d} | Time(s) {:.4f} | Train_Loss {:.4f} | Val_Loss {:.4f} | Val_NDCG@5 {:.4f} | "
            "Val_ARP@5 {:.4f}| Val_MAP@5 {:.4f} |". format(epoch, np.mean(dur), np.mean(loss_log),val_loss,
                                             ndcg_val, arp_val, map_val))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Time(s) nan | Train_Loss 0.6526 | Val_Loss 0.1611 | Val_NDCG@5 0.0988 | Val_ARP@5 0.2135| Val_MAP@5 0.0385 |
Validation metric Increased (inf --> -0.070871).  Saving model ...
Epoch 00001 | Time(s) nan | Train_Loss 0.5354 | Val_Loss 0.2094 | Val_NDCG@5 0.0709 | Val_ARP@5 0.1559| Val_MAP@5 0.0273 |
EarlyStopping counter: 1 out of 20
Epoch 00002 | Time(s) nan | Train_Loss 0.4488 | Val_Loss 0.2593 | Val_NDCG@5 0.0681 | Val_ARP@5 0.1530| Val_MAP@5 0.0256 |
EarlyStopping counter: 2 out of 20
Epoch 00003 | Time(s) 0.6587 | Train_Loss 0.3860 | Val_Loss 0.3074 | Val_NDCG@5 0.0698 | Val_ARP@5 0.1587| Val_MAP@5 0.0263 |
EarlyStopping counter: 3 out of 20
Epoch 00004 | Time(s) 0.6504 | Train_Loss 0.3403 | Val_Loss 0.3513 | Val_NDCG@5 0.0697 | Val_ARP@5 0.1594| Val_MAP@5 0.0261 |
EarlyStopping counter: 4 out of 20
Epoch 00005 | Time(s) 0.6498 | Train_Loss 0.3084 | Val_Loss 0.3905 | Val_NDCG@5 0.0706 | Val_ARP@5 0.1630| Val_MAP@5 0.0263 |
EarlyStopping counter: 5 out of 20
Epoch 00006

Validation metric Increased (-0.445520 --> -0.448894).  Saving model ...
Epoch 00043 | Time(s) 0.6781 | Train_Loss 0.1621 | Val_Loss 0.5168 | Val_NDCG@5 0.4489 | Val_ARP@5 0.3829| Val_MAP@5 0.2565 |
Validation metric Increased (-0.448894 --> -0.453929).  Saving model ...
Epoch 00044 | Time(s) 0.6809 | Train_Loss 0.1615 | Val_Loss 0.5190 | Val_NDCG@5 0.4539 | Val_ARP@5 0.3964| Val_MAP@5 0.2572 |
Validation metric Increased (-0.453929 --> -0.454868).  Saving model ...
Epoch 00045 | Time(s) 0.6818 | Train_Loss 0.1612 | Val_Loss 0.5185 | Val_NDCG@5 0.4549 | Val_ARP@5 0.3943| Val_MAP@5 0.2583 |
EarlyStopping counter: 1 out of 20
Epoch 00046 | Time(s) 0.6836 | Train_Loss 0.1607 | Val_Loss 0.5154 | Val_NDCG@5 0.4531 | Val_ARP@5 0.3979| Val_MAP@5 0.2571 |
Validation metric Increased (-0.454868 --> -0.459937).  Saving model ...
Epoch 00047 | Time(s) 0.6849 | Train_Loss 0.1602 | Val_Loss 0.5156 | Val_NDCG@5 0.4599 | Val_ARP@5 0.4057| Val_MAP@5 0.2604 |
Validation metric Increased (-0.459937 --> 

EarlyStopping counter: 1 out of 20
Epoch 00090 | Time(s) 0.7226 | Train_Loss 0.1489 | Val_Loss 0.4930 | Val_NDCG@5 0.4846 | Val_ARP@5 0.4192| Val_MAP@5 0.2753 |
EarlyStopping counter: 2 out of 20
Epoch 00091 | Time(s) 0.7233 | Train_Loss 0.1484 | Val_Loss 0.4961 | Val_NDCG@5 0.4844 | Val_ARP@5 0.4206| Val_MAP@5 0.2750 |
EarlyStopping counter: 3 out of 20
Epoch 00092 | Time(s) 0.7243 | Train_Loss 0.1483 | Val_Loss 0.4984 | Val_NDCG@5 0.4847 | Val_ARP@5 0.4199| Val_MAP@5 0.2753 |
EarlyStopping counter: 4 out of 20
Epoch 00093 | Time(s) 0.7252 | Train_Loss 0.1487 | Val_Loss 0.4965 | Val_NDCG@5 0.4815 | Val_ARP@5 0.4228| Val_MAP@5 0.2725 |
EarlyStopping counter: 5 out of 20
Epoch 00094 | Time(s) 0.7250 | Train_Loss 0.1481 | Val_Loss 0.4925 | Val_NDCG@5 0.4815 | Val_ARP@5 0.4228| Val_MAP@5 0.2725 |
EarlyStopping counter: 6 out of 20
Epoch 00095 | Time(s) 0.7256 | Train_Loss 0.1488 | Val_Loss 0.4984 | Val_NDCG@5 0.4817 | Val_ARP@5 0.4221| Val_MAP@5 0.2728 |
EarlyStopping counter: 7 out of 20

In [55]:
model.load_state_dict(torch.load('checkpoint.pt'))
test_loss, eval_result = evaluate_for_test(model, loss_type = BCELoss, eval_log = test_dat, eval_positions = eval_positions, 
                                            use_cuda = False)

train by IPS and eval by IPS

In [47]:
eval_result

Unnamed: 0,NDCG,ARP,MAP
1,0.352564,0.352564,0.352564
3,0.402188,0.705128,0.360399
5,0.452152,0.967949,0.354829
7,0.469205,0.987179,0.338658
10,0.484419,1.087271,0.32647


train by IPS

In [50]:
eval_result

Unnamed: 0,NDCG,ARP,MAP
1,0.365385,0.365385,0.365385
3,0.405939,0.730769,0.361467
5,0.439625,0.930769,0.351752
7,0.467513,0.991758,0.336482
10,0.481833,1.092605,0.324636


train by naive

In [53]:
eval_result

Unnamed: 0,NDCG,ARP,MAP
1,0.365385,0.365385,0.365385
3,0.381872,0.632479,0.347934
5,0.423689,0.866667,0.333825
7,0.450333,0.948718,0.3184
10,0.466296,1.053434,0.307367


train by full info

In [56]:
eval_result

Unnamed: 0,NDCG,ARP,MAP
1,0.384615,0.384615,0.384615
3,0.417596,0.722222,0.372863
5,0.450248,0.903846,0.359038
7,0.48066,1.0,0.341337
10,0.489631,1.039446,0.329482


In [263]:
for parameters in model.parameters():
    print(parameters.shape)

torch.Size([16, 46])
torch.Size([16])
torch.Size([1, 16])


Evaluation for Validation log

In [233]:
loss_type = BCELoss
#is_vali = True
with_weight = False
eval_log = vali_log
eval_positions = [1,3,5,7,10]


if config['use_cuda']:
    eval_targets = torch.Tensor(eval_log['isClick']).cuda()
    eval_feature = torch.Tensor(eval_log['feature']).cuda()
    eval_weights = torch.Tensor(eval_log['ips_weight']).cuda()
else:
    eval_targets = torch.Tensor(eval_log['isClick']).cpu()
    eval_feature = torch.Tensor(eval_log['feature']).cpu()
    eval_weights = torch.Tensor(eval_log['ips_weight']).cpu()  

eval_predicts = model(eval_feature)
eval_predicts = eval_predicts.view(eval_predicts.size(0))
loss_func = loss_type(weight = eval_weights)
val_loss = loss_func(eval_predicts, eval_targets)

# trans to cpu data for using pandas
eval_predicts = eval_predicts.cpu()

# build evaluation dataframe
df_eval = pd.DataFrame([], columns=['sid', 'qid', 'did','isClick', 'ips_weight','predict'])
df_eval['sid'] = eval_log['sid']
df_eval['qid'] = eval_log['qid']
df_eval['did'] = eval_log['did']
df_eval['isClick'] = eval_log['isClick']
df_eval['ips_weight'] = eval_log['ips_weight']
df_eval['predict'] = eval_predicts.view(-1).tolist()

# give corresponding rank position for each query-doc pair
df_eval['rank'] = df_eval.groupby('sid')['predict'].rank(method='first', ascending=False)
# sort for each group
temp = df_eval.groupby('sid').apply(lambda x: x.sort_values('rank', ascending=True))
temp.reset_index(drop=True, inplace=True)
# output click_list/ips_list/rank_list for each session
temp_group = temp.groupby(['qid','sid'])
df_static = temp_group['isClick'].apply(list).reset_index()
df_static.rename(columns={'isClick':'click_list'},inplace = True)
df_static['ips_list'] = temp_group['ips_weight'].apply(list).reset_index()['ips_weight']
df_static['rank_list'] = temp_group['rank'].apply(list).reset_index()['rank']

# init evaluators
eval_sessionNum = vali_log['sid'].unique().shape[0]
NDCG_evals = []
ARP_evals = []
MAP_evals = []
for p in eval_positions:
    NDCG_evals.append(NDCG(p))
    ARP_evals.append(ARP(p))
    MAP_evals.append(AP(p))
    
if with_weight:
    # return metrics' values averaged on session num
    NDCGs = [df_static.apply(lambda row: NDCG_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
             for NDCG_eval in NDCG_evals]
    ARPs = [df_static.apply(lambda row: ARP_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
             for ARP_eval in ARP_evals]
    MAPs = [df_static.apply(lambda row: MAP_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
             for MAP_eval in MAP_evals]
else:
    NDCGs = [df_static['click_list'].apply(NDCG_eval.evaluate).mean() 
             for NDCG_eval in NDCG_evals]
    ARPs = [df_static['click_list'].apply(ARP_eval.evaluate).mean() 
             for ARP_eval in ARP_evals]
    MAPs = [df_static['click_list'].apply(MAP_eval.evaluate).mean() 
             for MAP_eval in MAP_evals]

In [181]:
temp[temp['ips_weight']!=0].head()

Unnamed: 0,sid,qid,did,isClick,ips_weight,predict,rank
8,1,81,1307,1,1.0,0.998957,1.0
40,3,111,1809,1,1.0,0.997896,2.0
116,6,43,606,1,5.005793,0.995614,3.0
122,7,74,1124,1,3.016006,0.998437,1.0
211,9,104,1715,1,1.0,0.998528,1.0


In [190]:
df_static.head()

Unnamed: 0,qid,sid,click_list,ips_list,rank_list
0,0,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
1,0,59,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0057932255234...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
2,0,62,"[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[0.0, 7.000607679387309, 0.0, 0.0, 0.0, 0.0, 5...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
3,0,93,"[0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[0.0, 7.000607679387309, 0.0, 1.99573598636091...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
4,0,116,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[0.0, 7.000607679387309, 0.0, 0.0, 0.0, 0.0, 0...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."


Evaluation for Test Dataset

In [270]:
loss_type = BCELoss
#is_vali = True
with_weight = False
use_cuda = False
eval_log = test_dat
eval_positions = [1,3,5,7,10]


if use_cuda:
    eval_targets = torch.Tensor(eval_log['label']).cuda()
    eval_feature = torch.Tensor(eval_log['feature']).cuda()
else:
    eval_targets = torch.Tensor(eval_log['label']).cpu()
    eval_feature = torch.Tensor(eval_log['feature']).cpu()

eval_predicts = model(eval_feature)
eval_predicts = eval_predicts.view(eval_predicts.size(0))
loss_func = loss_type()
test_loss = loss_func(eval_predicts, eval_targets)

# trans to cpu data for using pandas
eval_predicts = eval_predicts.cpu()

# build evaluation dataframe
df_eval = pd.DataFrame([], columns=['qid', 'did', 'label', 'predict'])
df_eval['qid'] = eval_log['qid']
df_eval['did'] = eval_log['did']
df_eval['label'] = eval_log['label']
df_eval['predict'] = eval_predicts.view(-1).tolist()

# give corresponding rank position for each query-doc pair
df_eval['rank'] = df_eval.groupby('qid')['predict'].rank(method='first', ascending=False)
# sort for each group
temp = df_eval.groupby('qid').apply(lambda x: x.sort_values('rank', ascending=True))
temp.reset_index(drop=True, inplace=True)
# output click_list/ips_list/rank_list for each session
temp_group = temp.groupby('qid')
df_static = temp_group['label'].apply(list).reset_index()
df_static.rename(columns={'label':'label_list'},inplace = True)
df_static['rank_list'] = temp_group['rank'].apply(list).reset_index()['rank']

# init evaluators
#eval_sessionNum = vali_log['sid'].unique().shape[0]
NDCG_evals = []
ARP_evals = []
MAP_evals = []
for p in eval_positions:
    NDCG_evals.append(NDCG(p))
    ARP_evals.append(ARP(p))
    MAP_evals.append(AP(p))
    

NDCGs = [df_static['label_list'].apply(NDCG_eval.evaluate).mean() 
             for NDCG_eval in NDCG_evals]
ARPs = [df_static['label_list'].apply(ARP_eval.evaluate).mean() 
             for ARP_eval in ARP_evals]
MAPs = [df_static['label_list'].apply(MAP_eval.evaluate).mean() 
             for MAP_eval in MAP_evals]

# wrap eval result into pandas dataframe
eval_result = pd.DataFrame([],columns=['NDCG','ARP','MAP'], index= eval_positions)
eval_result['NDCG'] = NDCGs
eval_result['ARP'] = ARPs
eval_result['MAP'] = MAPs



In [266]:
temp.head()

Unnamed: 0,qid,did,label,predict,rank
0,0,0,0,0.998905,1.0
1,0,2,0,0.998741,2.0
2,0,4,0,0.998104,3.0
3,0,3,1,0.99804,4.0
4,0,5,0,0.996761,5.0


In [268]:
df_static.tail()

Unnamed: 0,qid,label_list,rank_list
151,151,"[1, 0, 1, 0, 0, 0, 0, 0]","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]"
152,152,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
153,153,"[0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
154,154,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ..."
155,155,"[1, 1, 0, 0, 1, 0, 0]","[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]"


In [5]:
train_log.head(5)

Unnamed: 0,qid,did,label,isClick,rankPosition,feature,ips_weight,sid
0,64,1078,0,0,0,"[0.020649, 0.461538, 0.0, 1.0, 0.028515, 0.0, ...",0.0,0
1,64,1083,1,0,1,"[0.086037, 0.384615, 0.0, 0.0, 0.0884959999999...",0.0,0
2,64,1053,0,0,2,"[0.018191, 0.30769199999999997, 0.166666999999...",0.0,0
3,64,1073,0,0,3,"[0.009833, 0.384615, 0.0, 0.4, 0.014258, 0.0, ...",0.0,0
4,64,1056,0,0,4,"[0.021632, 0.30769199999999997, 0.5, 0.0, 0.02...",0.0,0


In [33]:
sorted_train = train_log.groupby('sid').apply(lambda x: x.sort_values(by = ['isClick','ips_weight'], ascending=[False, False]))
sorted_train.reset_index(drop=True, inplace=True)
sorted_train[sorted_train['sid']==106]

Unnamed: 0,qid,did,label,isClick,rankPosition,feature,ips_weight,sid
2332,59,946,1,1,4,"[0.170648, 0.5, 0.0, 0.0, 0.171786, 0.0, 0.0, ...",5.018654,106
2333,59,948,1,1,3,"[0.31399299999999997, 0.0, 0.0, 0.0, 0.3139929...",3.991579,106
2334,59,944,1,1,2,"[0.127418, 0.5, 0.0, 0.0, 0.128555, 0.0, 0.0, ...",3.006889,106
2335,59,945,1,1,0,"[0.189989, 1.0, 0.0, 0.0, 0.192264, 0.0, 0.0, ...",1.0,106
2336,59,942,0,0,1,"[0.047782, 1.0, 0.0, 0.0, 0.050057, 0.0, 0.0, ...",0.0,106
2337,59,949,0,0,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.0,106
2338,59,947,0,0,6,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",0.0,106
2339,59,943,1,0,7,"[0.100114, 0.0, 0.0, 0.0, 0.100114, 0.0, 0.0, ...",0.0,106


In [35]:
train_dat = pd.read_json(fp + 'json_file/Train.json')

train_dat.rename(columns={'queryID':'qid', 'docID':'did'}, inplace = True)
train_dat = train_dat[['qid','did','label','feature']]

In [37]:
def get_pair_fullinfo(df):
    df_pair = pd.DataFrame([],columns=['qid', 'rel_diff', 'pos_did', 'pos_feature', 'neg_did', 'neg_feature'])
    df_sort = df.groupby('qid').apply(lambda x: x.sort_values(by = ['label'], ascending=[False]))
    df_sort.reset_index(drop=True, inplace=True)    
    index = 0
    for _, row_out in tqdm(df_sort.iterrows(), total=df_sort.shape[0]):
        if row_out['label'] > 0: 
            for _, row_in in df_sort[df_sort['qid']==row_out['qid']].iterrows():
                if row_out['label'] > row_in['label']:
                    df_pair.loc[index] = [ row_out['qid'], row_out['label'] - row_in['label'], 
                                          row_out['did'], row_out['feature'], row_in['did'], row_in['feature']]
                    index += 1
        else:
            continue
    return df_pair

train_pair_fullinfo = get_pair_fullinfo(train_dat)

100%|██████████████████████████████████████████████████████████████████████████████| 9630/9630 [07:23<00:00, 21.71it/s]


In [38]:
train_pair_fullinfo

Unnamed: 0,qid,rel_diff,pos_did,pos_feature,neg_did,neg_feature
0,1,1,14,"[0.5936400000000001, 1.0, 0.0, 0.0, 0.600707, ...",15,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
1,1,1,14,"[0.5936400000000001, 1.0, 0.0, 0.0, 0.600707, ...",10,"[0.0070669999999999995, 0.0, 0.666667, 0.0, 0...."
2,1,1,14,"[0.5936400000000001, 1.0, 0.0, 0.0, 0.600707, ...",8,"[0.021200999999999998, 0.0, 1.0, 0.0, 0.031802..."
3,1,1,14,"[0.5936400000000001, 1.0, 0.0, 0.0, 0.600707, ...",9,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,1,1,14,"[0.5936400000000001, 1.0, 0.0, 0.0, 0.600707, ...",12,"[0.279152, 0.0, 0.0, 0.0, 0.279152, 0.0, 0.0, ..."
...,...,...,...,...,...,...
48081,470,1,9624,"[0.271599, 0.0, 0.0, 0.0, 0.271599, 0.0, 0.0, ...",9628,"[0.259641, 0.6000000000000001, 0.0, 0.0, 0.260..."
48082,470,1,9624,"[0.271599, 0.0, 0.0, 0.0, 0.271599, 0.0, 0.0, ...",9627,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
48083,470,1,9624,"[0.271599, 0.0, 0.0, 0.0, 0.271599, 0.0, 0.0, ...",9623,"[0.034828, 0.2, 1.0, 0.0, 0.035127, 0.0, 0.0, ..."
48084,470,1,9624,"[0.271599, 0.0, 0.0, 0.0, 0.271599, 0.0, 0.0, ...",9629,"[0.7910309999999999, 0.0, 0.0, 0.0, 0.79103099..."


In [25]:
from tqdm import tqdm

def ipw(row):
    return row['isClick'] * row['ips_weight']

def get_pair(df):
    df_pair = pd.DataFrame([],columns=['sid', 'qid', 'rel_diff', 'pos_did', 'pos_feature', 'neg_did', 'neg_feature'])
    df_sort = df.groupby('sid').apply(lambda x: x.sort_values(by = ['isClick','ips_weight'], ascending=[False, False]))
    df_sort.reset_index(drop=True, inplace=True)    
    index = 0
    for _, row_out in tqdm(df_sort.iterrows(), total=df_sort.shape[0]):
        if row_out['isClick'] > 0:
            ipw_row_out = ipw(row_out) 
            for _, row_in in df_sort[df_sort['sid']==row_out['sid']].iterrows():
                if ipw_row_out > ipw(row_in):
                    df_pair.loc[index] = [row_out['sid'], row_out['qid'], ipw_row_out - ipw(row_in), row_out['did'], row_out['feature'], 
                                         row_in['did'], row_in['feature']]
                    index += 1
        else:
            continue
    return df_pair
        
train_pair_log = get_pair(train_log)

18865it [05:45, 54.55it/s] 


In [19]:
train_pair_log

Unnamed: 0,sid,qid,rel_diff,pos_did,pos_feature,neg_did,neg_feature
0,1,93,5.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1684,"[0.006725, 1.0, 0.0, 1.0, 0.008272999999999999..."
1,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1688,"[0.0, 1.0, 1.0, 1.0, 0.002585, 0.0, 0.0, 0.0, ..."
2,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1689,"[1.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
3,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1687,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1685,"[0.020176, 0.0, 0.0, 0.0, 0.020165, 0.0, 0.0, ..."
...,...,...,...,...,...,...,...
17795,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1500,"[0.11254000000000002, 0.0, 0.0, 0.0, 0.1125400..."
17796,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1501,"[0.9614149999999999, 0.0, 0.0, 0.0, 0.96141499..."
17797,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1494,"[0.11254000000000002, 0.0, 0.0, 1.0, 0.1254019..."
17798,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1499,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."


In [22]:
import copy
import time

def ipw(row):
    return row['isClick'] * row['ips_weight']

def get_pair2(df):
    df_pair = pd.DataFrame([],columns=['sid', 'qid', 'rel_diff', 'pos_did', 'pos_feature', 'neg_did', 'neg_feature'])
    df_sort = df.groupby('sid').apply(lambda x: x.sort_values(by = ['isClick','ips_weight'], ascending=[False, False]))
    df_sort.reset_index(drop=True, inplace=True)
    sid_max = df_sort['sid'].max()
    index = 0
    
    cur_sid = 0
    while cur_sid <= sid_max:
        df_cur_out = df_sort[df_sort['sid'] == cur_sid] 
        df_cur_in = copy.deepcopy(df_cur_out)
        for _, row_out in df_cur_out.iterrows():
            if row_out['isClick'] > 0:
                ipw_row_out = ipw(row_out) 
                for _, row_in in df_cur_in.iterrows():
                    if ipw_row_out > ipw(row_in):
                        df_pair.loc[index] = [row_out['sid'], row_out['qid'], ipw_row_out - ipw(row_in), row_out['did'], row_out['feature'], 
                                         row_in['did'], row_in['feature']]
                        index += 1
                        
            else:
                # 在当前session当中遇到isClick==0就换到下一个sid
                break
        cur_sid += 1
    
    return df_pair
                
            
    
st = time.time()        
train_pair_log2 = get_pair2(train_log)
ed = time.time()
print('total time: ', ed - st)

total time:  346.3671541213989


In [24]:
train_pair_log2

Unnamed: 0,sid,qid,rel_diff,pos_did,pos_feature,neg_did,neg_feature
0,1,93,5.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1684,"[0.006725, 1.0, 0.0, 1.0, 0.008272999999999999..."
1,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1688,"[0.0, 1.0, 1.0, 1.0, 0.002585, 0.0, 0.0, 0.0, ..."
2,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1689,"[1.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
3,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1687,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,1,93,6.990714,1686,"[0.018623999999999998, 0.0, 0.0, 0.0, 0.018614...",1685,"[0.020176, 0.0, 0.0, 0.0, 0.020165, 0.0, 0.0, ..."
...,...,...,...,...,...,...,...
17795,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1500,"[0.11254000000000002, 0.0, 0.0, 0.0, 0.1125400..."
17796,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1501,"[0.9614149999999999, 0.0, 0.0, 0.0, 0.96141499..."
17797,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1494,"[0.11254000000000002, 0.0, 0.0, 1.0, 0.1254019..."
17798,997,85,2.009006,1491,"[0.115756, 1.0, 1.0, 0.75, 0.144695, 0.0, 0.0,...",1499,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."


In [31]:
fp = 'C:/Users/w9753/Desktop/counterfactual/pairwiseDebias/datasets/MQ2008/'
df_1 = pd.read_json(fp + 'click_log/Train_log_pair.json')

In [32]:
df_1

Unnamed: 0,sid,qid,rel_diff,pos_did,pos_feature,neg_did,neg_feature
0,1,56,0.997202,909,"[0.015038, 0.454545, 0.0, 0.0, 0.017544, 0.0, ...",905,"[0.021053, 1.0, 0.25, 0.5, 0.027569, 0.0, 0.0,..."
1,1,56,1.997202,909,"[0.015038, 0.454545, 0.0, 0.0, 0.017544, 0.0, ...",919,"[0.030074999999999998, 0.18181799999999998, 0...."
2,1,56,1.997202,909,"[0.015038, 0.454545, 0.0, 0.0, 0.017544, 0.0, ...",915,"[0.043608999999999995, 0.18181799999999998, 0...."
3,1,56,1.997202,909,"[0.015038, 0.454545, 0.0, 0.0, 0.017544, 0.0, ...",901,"[0.101754, 0.0, 0.0, 0.0, 0.101754, 0.0, 0.0, ..."
4,1,56,1.997202,909,"[0.015038, 0.454545, 0.0, 0.0, 0.017544, 0.0, ...",900,"[0.007018, 0.09090899999999999, 0.5, 1.0, 0.00..."
...,...,...,...,...,...,...,...
17106,999,117,1.000000,2113,"[0.05618000000000001, 1.0, 1.0, 1.0, 0.0909089...",2117,"[0.022472, 0.0, 0.0, 0.0, 0.011363999999999999..."
17107,999,117,1.000000,2113,"[0.05618000000000001, 1.0, 1.0, 1.0, 0.0909089...",2119,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
17108,999,117,1.000000,2113,"[0.05618000000000001, 1.0, 1.0, 1.0, 0.0909089...",2118,"[0.011236, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
17109,999,117,1.000000,2113,"[0.05618000000000001, 1.0, 1.0, 1.0, 0.0909089...",2115,"[0.011236, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [6]:
class DCG(object):

    def __init__(self, k=10, gain_type='exp2'):
        self.k = k
        self.discount = self._make_discount(256)
        if gain_type in ['exp2', 'identity']:
            self.gain_type = gain_type
        else:
            raise ValueError('gain type not equal to exp2 or identity')

    def evaluate(self, targets, weights=None):
        gain = self._get_gain(targets)
        discount = self._get_discount(min(self.k, len(gain)))
        if weights:
            ipw = self._get_weights(weights, min(self.k, len(gain)))
            return np.sum(np.divide(np.multiply(ipw, gain), discount))
        else:
            return np.sum(np.divide(gain, discount))

    def _get_gain(self, targets):
        t = targets[:self.k]
        if self.gain_type == 'exp2':
            return np.power(2.0, t) - 1.0
        else:
            return t

    def _get_discount(self, k):
        if k > len(self.discount):
            self.discount = self._make_discount(2 * len(self.discount))
        return self.discount[:k]

    def _get_weights(self, weights, k):
        return weights[:k]
    
    @staticmethod
    def _make_discount(n):
        x = np.arange(1, n+1, 1)
        discount = np.log2(x + 1)
        return discount


class NDCG(DCG):

    def __init__(self, k=10, gain_type='exp2'):
        super(NDCG, self).__init__(k, gain_type)

    def evaluate(self, targets, weights=None):
        dcg = super(NDCG, self).evaluate(targets, weights = weights)
        ideal = np.sort(targets)[::-1]
        idcg = super(NDCG, self).evaluate(ideal, weights = weights)
        if idcg == 0:
            return .0
        else:
            return dcg / idcg


class ARP(object):

    def __init__(self, k=10):
        self.k = k

    def evaluate(self, targets, weights=None):
        gain = self._get_gain(targets)
        discount = self._get_discount(min(self.k, len(gain)))
        if weights:
            ipw = self._get_weights(weights, min(self.k, len(gain)))
            return np.sum(np.multiply(np.multiply(gain, ipw), discount))
        else:
            return np.sum(np.multiply(gain, discount))

    def _get_gain(self, targets):
        t = targets[:self.k]
        t_binary = [1 if (score > 0) else 0 for score in t]
        return t_binary

    def _get_discount(self, k):
        self.discount = np.array([p + 1 for p in range(k)])
        return self.discount
    
    def _get_weights(self, weights, k):
        return weights[:k]

    
class Precision(object):

    def __init__(self, k=10):
        self.k = k

    def evaluate(self, targets, weights=None):
        gain = self._get_gain(targets)
        discount = self._get_discount(min(self.k, len(gain)))
        if weights:
            ipw = self._get_weights(weights, min(self.k, len(gain)))
            return np.sum(np.multiply(np.multiply(ipw, gain), discount))
        else:
            return np.sum(np.multiply(gain, discount))

    def _get_gain(self, targets):
        t = targets[:self.k]
        t_binary = [1 if (score > 0) else 0 for score in t]
        #self.gain_len = len(t_binary)
        return t_binary

    def _get_discount(self, k):
        self.discount = [1/k for _ in range(k)]
        return self.discount
    
    def _get_weights(self, weights, k):
        return weights[:k]
    

class AP(Precision):
    def __init__(self, k=10):
        super(AP, self).__init__(k)

    def evaluate(self, targets, weights=None):
        gain = super(AP, self)._get_gain(targets)
        Precision_list = [super(AP, self).evaluate(targets[:position + 1], weights = weights) for position in range(min(self.k, len(gain)))]
        Precision_list_mul_gain = np.multiply(gain, Precision_list)
        Denominator = min(sum(self._get_binary_targets(targets)),self.k)
        if Denominator == 0:
            return 0
        else:
            return sum(Precision_list_mul_gain)/Denominator

    def _get_binary_targets(self, targets):
        t_binary = [1 if (score > 0) else 0 for score in targets]
        return t_binary

In [7]:
ndcg_at_6 = NDCG(6)
arp_at_6 = ARP(6)
p_at_6 = Precision(6)
ap_at_6 = AP(6)

weights = [0.999, 1.995, 3.016, 4.001, 5.005, 6.009, 135193]

#targets = [3, 2, 0, 1, 0, 0, 0]
targets = [1, 1, 1, 0, 0, 0, 0]
#targets = [1, 1, 0, 1]
#targets = [0, 0, 0, 0, 0, 0, 0]

print(ndcg_at_6.evaluate(targets))
print(arp_at_6.evaluate(targets))
print(p_at_6.evaluate(targets))
print([p_at_6.evaluate(targets[:position + 1]) for position in range(min(len(targets), 6))])
print(ap_at_6.evaluate(targets))
print('')
print('test with weight:')
print(ndcg_at_6.evaluate(targets, weights = weights))
print(arp_at_6.evaluate(targets, weights = weights))
print(p_at_6.evaluate(targets, weights = weights))
print([p_at_6.evaluate(targets[:position + 1], weights = weights) for position in range(min(len(targets), 6))])
print(ap_at_6.evaluate(targets, weights = weights))

1.0
6
0.5
[1.0, 1.0, 1.0, 0.75, 0.6000000000000001, 0.5]
1.0

test with weight:
1.0
14.036999999999999
1.0016666666666665
[0.999, 1.497, 2.003333333333333, 1.5025, 1.202, 1.0016666666666665]
1.4997777777777774


In [8]:
config = {
    'use_cuda':False
}

loss_type = BCELoss
#is_vali = True
with_weight = False
eval_log = vali_log
eval_positions = [1,3,5,7,10]

def evaluate_for_vali(model, loss_type, eval_log, eval_positions, with_weight = False, use_cuda = False):
    """
    output NDCG, ARP, MAP values for given validation click log data
    """
    if use_cuda:
        eval_targets = torch.Tensor(eval_log['isClick']).cuda()
        eval_feature = torch.Tensor(eval_log['feature']).cuda()
        eval_weights = torch.Tensor(eval_log['ips_weight']).cuda()
    else:
        eval_targets = torch.Tensor(eval_log['isClick']).cpu()
        eval_feature = torch.Tensor(eval_log['feature']).cpu()
        eval_weights = torch.Tensor(eval_log['ips_weight']).cpu()  

    # given predicts and loss on validation click log data
    eval_predicts = model(eval_feature)
    eval_predicts = eval_predicts.view(eval_predicts.size(0))
    loss_func = loss_type(weight = eval_weights)
    val_loss = loss_func(eval_predicts, eval_targets)

    # transform predicts to cpu data for using pandas
    eval_predicts = eval_predicts.cpu()

    # build evaluation dataframe
    df_eval = pd.DataFrame([], columns=['sid', 'qid', 'did','isClick', 'ips_weight','predict'])
    df_eval['sid'] = eval_log['sid']
    df_eval['qid'] = eval_log['qid']
    df_eval['did'] = eval_log['did']
    df_eval['isClick'] = eval_log['isClick']
    df_eval['ips_weight'] = eval_log['ips_weight']
    df_eval['predict'] = eval_predicts.view(-1).tolist()

    # give corresponding rank position for each query-doc pair
    df_eval['rank'] = df_eval.groupby('sid')['predict'].rank(method='first', ascending=False)
    # sort for each group
    temp = df_eval.groupby('sid').apply(lambda x: x.sort_values('rank', ascending=True))
    temp.reset_index(drop=True, inplace=True)
    # output click_list/ips_list/rank_list for each session
    temp_group = temp.groupby(['qid','sid'])
    df_static = temp_group['isClick'].apply(list).reset_index()
    df_static.rename(columns={'isClick':'click_list'},inplace = True)
    df_static['ips_list'] = temp_group['ips_weight'].apply(list).reset_index()['ips_weight']
    df_static['rank_list'] = temp_group['rank'].apply(list).reset_index()['rank']

    # init evaluators
    #eval_sessionNum = eval_log['sid'].unique().shape[0]
    NDCG_evals = []
    ARP_evals = []
    MAP_evals = []
    for p in eval_positions:
        NDCG_evals.append(NDCG(p))
        ARP_evals.append(ARP(p))
        MAP_evals.append(AP(p))

    if with_weight:
        # return metrics' values averaged on session num with ips weights
        NDCGs = [df_static.apply(lambda row: NDCG_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
                 for NDCG_eval in NDCG_evals]
        ARPs = [df_static.apply(lambda row: ARP_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
                 for ARP_eval in ARP_evals]
        MAPs = [df_static.apply(lambda row: MAP_eval.evaluate(targets = row['click_list'], weights = row['ips_list']), axis = 1).mean() 
                 for MAP_eval in MAP_evals]
    else:
        # return metrics' values averaged on session num without ips weights
        NDCGs = [df_static['click_list'].apply(NDCG_eval.evaluate).mean() 
                 for NDCG_eval in NDCG_evals]
        ARPs = [df_static['click_list'].apply(ARP_eval.evaluate).mean() 
                 for ARP_eval in ARP_evals]
        MAPs = [df_static['click_list'].apply(MAP_eval.evaluate).mean() 
                 for MAP_eval in MAP_evals]
    
    # wrap eval result into pandas dataframe
    eval_result = pd.DataFrame([],columns=['NDCG','ARP','MAP'], index= eval_positions)
    eval_result['NDCG'] = NDCGs
    eval_result['ARP'] = ARPs
    eval_result['MAP'] = MAPs

    return val_loss, eval_result

NameError: name 'vali_log' is not defined

In [9]:
def evaluate_for_test(model, loss_type, eval_log, eval_positions, use_cuda = False):
    if use_cuda:
        eval_targets = torch.Tensor(eval_log['label']).cuda()
        eval_feature = torch.Tensor(eval_log['feature']).cuda()
    else:
        eval_targets = torch.Tensor(eval_log['label']).cpu()
        eval_feature = torch.Tensor(eval_log['feature']).cpu()

    eval_predicts = model(eval_feature)
    eval_predicts = eval_predicts.view(eval_predicts.size(0))
    loss_func = loss_type()
    test_loss = loss_func(eval_predicts, eval_targets)

    # trans to cpu data for using pandas
    eval_predicts = eval_predicts.cpu()

    # build evaluation dataframe
    df_eval = pd.DataFrame([], columns=['qid', 'did', 'label', 'predict'])
    df_eval['qid'] = eval_log['qid']
    df_eval['did'] = eval_log['did']
    df_eval['label'] = eval_log['label']
    df_eval['predict'] = eval_predicts.view(-1).tolist()

    # give corresponding rank position for each query-doc pair
    df_eval['rank'] = df_eval.groupby('qid')['predict'].rank(method='first', ascending=False)
    # sort for each group
    temp = df_eval.groupby('qid').apply(lambda x: x.sort_values('rank', ascending=True))
    temp.reset_index(drop=True, inplace=True)
    # output click_list/ips_list/rank_list for each session
    temp_group = temp.groupby('qid')
    df_static = temp_group['label'].apply(list).reset_index()
    df_static.rename(columns={'label':'label_list'},inplace = True)
    df_static['rank_list'] = temp_group['rank'].apply(list).reset_index()['rank']

    # init evaluators
    #eval_sessionNum = vali_log['sid'].unique().shape[0]
    NDCG_evals = []
    ARP_evals = []
    MAP_evals = []
    for p in eval_positions:
        NDCG_evals.append(NDCG(p))
        ARP_evals.append(ARP(p))
        MAP_evals.append(AP(p))


    NDCGs = [df_static['label_list'].apply(NDCG_eval.evaluate).mean() 
                 for NDCG_eval in NDCG_evals]
    ARPs = [df_static['label_list'].apply(ARP_eval.evaluate).mean() 
                 for ARP_eval in ARP_evals]
    MAPs = [df_static['label_list'].apply(MAP_eval.evaluate).mean() 
                 for MAP_eval in MAP_evals]

    # wrap eval result into pandas dataframe
    eval_result = pd.DataFrame([],columns=['NDCG','ARP','MAP'], index= eval_positions)
    eval_result['NDCG'] = NDCGs
    eval_result['ARP'] = ARPs
    eval_result['MAP'] = MAPs
    
    return test_loss, eval_result

In [10]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation metric Increased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt') # 这里会存储迄今最优模型的参数
        self.val_loss_min = val_loss

In [11]:
fp1 = '../datasets/MQ2008/predict/test_predict.txt'
fp2 = '../datasets/MQ2008/json_file/test.json'
df_predict = pd.read_table(fp1, header=None)
df_test = pd.read_json(fp2)
df_test.rename(columns={'queryID':'qid', 'docID':'did'}, inplace = True)
df_test = df_test[['qid','did','label','feature']]

In [12]:
type(df_test) == pd.core.frame.DataFrame

True

In [13]:
evaluate_for_init_ranker(df_predict, df_test, [1,3,5,7,10])

Unnamed: 0,NDCG,ARP,MAP
1,0.339744,0.339744,0.339744
3,0.361099,1.846154,0.315527
5,0.400088,4.153846,0.332516
7,0.425679,6.339744,0.349672
10,0.450907,9.801282,0.365019


In [2]:
def evaluate_for_init_ranker(predict_log, eval_log, eval_positions):
    """
    output NDCG, ARP, MAP values for initialized product rank model
    """
    eval_predicts = predict_log[0].values.tolist()

    # build evaluation dataframe
    df_eval = pd.DataFrame([], columns=['qid', 'did', 'label', 'predict'])
    df_eval['qid'] = eval_log['qid']
    df_eval['did'] = eval_log['did']
    df_eval['label'] = eval_log['label']
    df_eval['predict'] = eval_predicts

    # give corresponding rank position for each query-doc pair
    df_eval['rank'] = df_eval.groupby('qid')['predict'].rank(method='first', ascending=False)
    # sort for each group
    temp = df_eval.groupby('qid').apply(lambda x: x.sort_values('rank', ascending=True))
    temp.reset_index(drop=True, inplace=True)
    # output click_list/ips_list/rank_list for each session
    temp_group = temp.groupby('qid')
    df_static = temp_group['label'].apply(list).reset_index()
    df_static.rename(columns={'label':'label_list'},inplace = True)
    df_static['rank_list'] = temp_group['rank'].apply(list).reset_index()['rank']

    # init evaluators
    #eval_sessionNum = vali_log['sid'].unique().shape[0]
    NDCG_evals = []
    ARP_evals = []
    MAP_evals = []
    for p in eval_positions:
        NDCG_evals.append(NDCG(p))
        ARP_evals.append(ARP(p))
        MAP_evals.append(AP(p))

    NDCGs = [df_static['label_list'].apply(NDCG_eval.evaluate).mean() 
                 for NDCG_eval in NDCG_evals]
    ARPs = [df_static['label_list'].apply(ARP_eval.evaluate).mean() 
                 for ARP_eval in ARP_evals]
    MAPs = [df_static['label_list'].apply(MAP_eval.evaluate).mean() 
                 for MAP_eval in MAP_evals]

    # wrap eval result into pandas dataframe
    eval_result = pd.DataFrame([],columns=['NDCG','ARP','MAP'], index= eval_positions)
    eval_result['NDCG'] = NDCGs
    eval_result['ARP'] = ARPs
    eval_result['MAP'] = MAPs
    
    return eval_result

In [8]:
fp = '../datasets/MQ2008/json_file/Train_estimate.json'
df_estimate = pd.read_json(fp)

In [16]:
df_estimate[df_estimate['queryID']==105]

Unnamed: 0,feature,label,docID,oriQueryID,queryID,rankScore,rankPosition,estimate_label
1913,"[1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",1,1938,11208,105,1.036354,0,0.583333
1914,"[0.45965399999999995, 1.0, 0.0, 0.0, 0.4604319...",1,1933,11208,105,0.762939,1,0.5
1915,"[0.002882, 1.0, 1.0, 1.0, 0.007194, 0.0, 0.0, ...",1,1920,11208,105,0.313575,2,0.75
1916,"[0.04755, 0.0, 0.0, 0.0, 0.047481999999999996,...",1,1924,11208,105,0.213323,3,1.0
1917,"[0.072046, 0.0, 0.0, 0.0, 0.07194199999999999,...",1,1935,11208,105,0.087715,4,0.666667
1918,"[0.073487, 0.0, 0.0, 0.0, 0.073381, 0.0, 0.0, ...",1,1936,11208,105,0.080569,5,0.666667
1919,"[0.15417899999999998, 0.0, 0.0, 0.0, 0.1539569...",1,1931,11208,105,0.070447,6,0.5
1920,"[0.164265, 0.0, 0.0, 0.0, 0.16402899999999998,...",1,1937,11208,105,0.06465,7,0.5
1921,"[0.069164, 0.0, 0.0, 0.0, 0.069065, 0.0, 0.0, ...",1,1916,11208,105,0.061095,8,0.583333
1922,"[0.09366000000000001, 0.0, 0.0, 0.0, 0.093525,...",1,1930,11208,105,0.060879,9,0.666667


In [13]:
fp = 'C:/Users/w9753/Desktop/counterfactual/pairwiseDebias/datasets/MQ2008/json_file/Train.json'
df_train = pd.read_json(fp)

In [14]:
df_train

Unnamed: 0,feature,label,docID,oriQueryID,queryID,rankScore,rankPosition
0,"[0.25981299999999996, 1.0, 0.0, 0.0, 0.260504,...",0,7,10002,0,1.101494,0
1,"[0.005607, 0.5, 1.0, 0.0, 0.006535999999999999...",0,6,10002,0,0.829384,1
2,"[1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",0,4,10002,0,0.410193,2
3,"[0.21495299999999998, 0.0, 0.0, 0.0, 0.2138189...",0,2,10002,0,0.265893,3
4,"[0.008411, 0.0, 0.0, 0.0, 0.007470000000000001...",0,5,10002,0,0.211872,4
...,...,...,...,...,...,...,...
9625,"[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",0,9627,15925,470,0.098985,3
9626,"[0.034828, 0.2, 1.0, 0.0, 0.035127, 0.0, 0.0, ...",0,9623,15925,470,0.075282,4
9627,"[0.271599, 0.0, 0.0, 0.0, 0.271599, 0.0, 0.0, ...",1,9624,15925,470,0.056196,5
9628,"[0.7910309999999999, 0.0, 0.0, 0.0, 0.79103099...",0,9629,15925,470,0.026664,6
