In [9]:
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import torch.nn.functional as F

from utils_wei.pytorchtools import EarlyStopping

### 1  Actor,Critic,Baseline network¶

In [10]:
activation_dict = {"relu": nn.ReLU(),"sigmoid": nn.Sigmoid(),"softmax": nn.Softmax(),"selu": nn.SELU()}

#Use feature as the input and output selection probability
class Actor(nn.Module):
    
    def __init__(self, input_dim, h_dim, output_dim, layer_num, activation):
        super(Actor, self).__init__()
        #add regularization term in loss in pytroch, not every layer in keras
        layer_list = []
        layer_list.append(nn.Linear(input_dim, h_dim))
        layer_list.append(activation_dict[activation])
        for _ in range(layer_num - 2):
            layer_list.append(nn.Linear(h_dim, h_dim))
            layer_list.append(activation_dict[activation])
        layer_list.append(nn.Linear(h_dim, output_dim))
        layer_list.append(activation_dict["sigmoid"])
        
        self.linears = nn.Sequential(*layer_list)
        
    def forward(self, x):
        return self.linears(x)
        
#Use selected feature as the input and predict labels    
class Critic_RankNet(nn.Module):
    def __init__(self, inputs, hidden_size, outputs):
        super(Critic_RankNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(inputs, hidden_size),
            #nn.Dropout(0.5),
            #nn.ReLU(inplace=True),
            nn.LeakyReLU(0.2,  inplace=True),#inplace为True，将会改变输入的数据 ，否则不会改变原输入，只会产生新的输出
            #nn.SELU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(0.2,  inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(0.2,  inplace=True),
            nn.Linear(hidden_size, outputs),
            nn.Sigmoid()
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_1, selection_1, input_2, selection_2):
        
        input_1 = input_1 * selection_1
        result_1 = self.model(input_1) #预测input_1得分
        
        input_2 = input_2 * selection_2
        result_2 = self.model(input_2) #预测input_2得分
        
        pred = self.sigmoid(result_1 - result_2) #input_1比input_2更相关概率
        return pred

    def predict(self, input, selection):
        
        input = input * selection
        result = self.model(input)
        return result   

#Use the original feature as the input and predict labels
class Baseline_RankNet(nn.Module):
    
    def __init__(self, inputs, hidden_size, outputs):
        super(Baseline_RankNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(inputs, hidden_size),
            #nn.Dropout(0.5),
            #nn.ReLU(inplace=True),
            nn.LeakyReLU(0.2,  inplace=True),#inplace为True，将会改变输入的数据 ，否则不会改变原输入，只会产生新的输出
            #nn.SELU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(0.2,  inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(0.2,  inplace=True),
            nn.Linear(hidden_size, outputs),
            nn.Sigmoid()
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_1, input_2):
        
        result_1 = self.model(input_1) #预测input_1得分
        result_2 = self.model(input_2) #预测input_2得分
        pred = self.sigmoid(result_1 - result_2) #input_1比input_2更相关概率
        return pred

    def predict(self, input):
        result = self.model(input)
        return result   


### 2  data preparation

#### Dstaset

In [11]:
class Dataset(data.Dataset):

    def __init__(self, data_path):
        # 解析训练数据
        read_numpy = np.load(data_path)
        # pair组合
        self.array_train_x0, \
        self.array_train_y0, \
        self.array_train_x1, \
        self.array_train_y1, \
        self.array_within_query_signal = read_numpy['arr_0'],read_numpy['arr_1'],read_numpy['arr_2'],read_numpy['arr_3'],read_numpy['arr_4']

    def __getitem__(self, index):
        
        data1 = torch.from_numpy(self.array_train_x0[index]).float()
        y1 = torch.tensor(self.array_train_y0[index]).float()
        
        data2 = torch.from_numpy(self.array_train_x1[index]).float()
        y2 = torch.tensor(self.array_train_y1[index]).float()
        
        signal = torch.tensor(self.array_within_query_signal[index]).float()
        
        return data1, y1, data2, y2, signal

    def __len__(self):
        return self.array_train_x0.shape[0]

def get_loader(data_path, batch_size, shuffle, drop_last):
    
    dataset = Dataset(data_path)
    
    data_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size = batch_size,
        shuffle = shuffle,
        drop_last=drop_last
    )
    return data_loader

### Definition of Loss and Training process

In [12]:
def pair_actor_loss(actor_output_1, actor_output_2, selection_1, selection_2, critic_loss_1, critic_loss_2, baseline_loss_1, baseline_loss_2, signal, lamda, beta, gamma):

    m = torch.nn.Softmax(dim=1)
    
    Reward_1 = critic_loss_1.detach() - baseline_loss_1.detach()
    Pi_1 = (selection_1 * torch.log(actor_output_1 + 1e-8) + (1-selection_1) * torch.log(1-actor_output_1 + 1e-8)).sum(1)
    L0_1 = actor_output_1.detach().mean(1)
#     custom_actor_loss_1 = Pi_1 * Reward_1 + lamda * L0_1
#     L0_1 = selection_1.mean(1)
    custom_actor_loss_1 = Pi_1 * Reward_1 + lamda * L0_1
    #*************************************************************************
    Reward_2 = critic_loss_2.detach() - baseline_loss_2.detach()
    Pi_2 = (selection_2 * torch.log(actor_output_2 + 1e-8) + (1-selection_2) * torch.log(1-actor_output_2 + 1e-8)).sum(1)
    L0_2 = actor_output_2.detach().mean(1)
#     custom_actor_loss_2 = Pi_2 * Reward_2 + lamda * L0_2
#     L0_2 = selection_2.mean(1)
    custom_actor_loss_2 = Pi_2 * Reward_2 + lamda * L0_2
    #***************************************************************************
    
    actor_output_1 = m(actor_output_1)
    actor_output_2 = m(actor_output_2)
    
    selection_loss = -((actor_output_1 * torch.log(actor_output_2 + 1e-8) + (1-actor_output_1) * torch.log(1-actor_output_2 + 1e-8))).sum(1)
    
    signal_beta = signal.type(torch.bool)
    signal_gamma = (1 - signal).type(torch.bool)
    final_selection_loss = torch.FloatTensor(selection_loss.size()).type_as(selection_loss)
    
    final_selection_loss[signal_beta] = beta * selection_loss[signal_beta]
    final_selection_loss[signal_gamma] = -gamma * selection_loss[signal_gamma]
    
    
    return ((custom_actor_loss_1.mean() + custom_actor_loss_2.mean())/2) + final_selection_loss.mean()


In [13]:
def train_model(actor_model, critic_model, baseline_model, patience, saved_path, epochs, lamda, beta, gamma):
        
    actor_optimizer = torch.optim.Adam(actor_model.parameters(),lr = 1e-5, weight_decay=1e-5)
    critic_optimizer = torch.optim.Adam(critic_model.parameters(),lr = 1e-4, weight_decay=1e-5)
    baseline_optimizer = torch.optim.Adam(baseline_model.parameters(),lr = 1e-4, weight_decay=1e-5)
    
    critic_criterion = nn.BCELoss()
    baseline_criterion = nn.BCELoss()
    loss_criterion = nn.BCELoss()
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, path=saved_path,verbose=True)
    
    for epoch in range(epochs):
        
        epoch_train_actor_loss_output = []
        epoch_train_critic_acc = []
        
        actor_model.train()
        critic_model.train()
        baseline_model.train()

        for batch, (data1, y1, data2, y2, signal) in enumerate(train_loader):
            
            # get selections of data1 and data2
            actor_output_1 = actor_model(data1.float())
#             selection_1 = torch.bernoulli(torch.tensor(actor_output_1))
            selection_1 = torch.bernoulli(actor_output_1)
            
            actor_output_2 = actor_model(data2.float())
#             selection_2 = torch.bernoulli(torch.tensor(actor_output_2))
            selection_2 = torch.bernoulli(actor_output_2)

            # train critic model
            critic_output = critic_model(data1.float(), selection_1, data2.float(), selection_2)
            
            label_difference = y1.ge(y2).double()
            critic_loss_output = critic_criterion(critic_output.double(), label_difference)

            critic_optimizer.zero_grad()
            critic_loss_output.backward(retain_graph = True)
            critic_optimizer.step()
            
            critic_output_1 = critic_model.predict(data1.float(), selection_1)
            critic_output_2 = critic_model.predict(data2.float(), selection_2)
            
            #--------Performance of predictor------------------------------------------------------
            critic_acc_1 = torch.eq(critic_output_1.ge(0.5).view(1,-1), y1.ge(1).view(1,-1)).sum().item() / batch_size
            critic_acc_2 = torch.eq(critic_output_2.ge(0.5).view(1,-1), y2.ge(1).view(1,-1)).sum().item() / batch_size
            
            epoch_train_critic_acc.append((critic_acc_1 + critic_acc_2) / 2)
                
            #--------------------------------------------------------------------------------------
            
            # train basseline model
            baseline_output = baseline_model(data1.float(), data2.float())
            baseline_loss_output = baseline_criterion(baseline_output.double(), label_difference)

            baseline_optimizer.zero_grad()
            baseline_loss_output.backward(retain_graph = True)
            baseline_optimizer.step()
            
            baseline_output_1 = baseline_model.predict(data1.float())
            baseline_output_2 = baseline_model.predict(data2.float())

            critic_loss_1 = -((y1.ge(1).float().view(-1,1) * torch.log(critic_output_1 + 1e-8)) + (1-y1.ge(1).float().view(-1,1)) * torch.log(1 - critic_output_1 + 1e-8))
            critic_loss_2 = -((y2.ge(1).float().view(-1,1) * torch.log(critic_output_2 + 1e-8)) + (1-y2.ge(1).float().view(-1,1)) * torch.log(1 - critic_output_2 + 1e-8))
                        
            baseline_loss_1 = -((y1.ge(1).float().view(-1,1) * torch.log(baseline_output_1 + 1e-8)) + (1-y1.ge(1).float().view(-1,1)) * torch.log(1 - baseline_output_1 + 1e-8))
            baseline_loss_2 = -((y2.ge(1).float().view(-1,1) * torch.log(baseline_output_2 + 1e-8)) + (1-y2.ge(1).float().view(-1,1)) * torch.log(1 - baseline_output_2 + 1e-8))
        
            # update selector network
            actor_loss_output = pair_actor_loss(actor_output_1, actor_output_2, selection_1, selection_2, critic_loss_1, critic_loss_2, baseline_loss_1, baseline_loss_2, signal, lamda, beta, gamma)
                        
            actor_optimizer.zero_grad()
            actor_loss_output.backward()
            actor_optimizer.step()
                        
            epoch_train_actor_loss_output.append(actor_loss_output.item())
            
        print(epoch+1,"***********************************************************************")
        print("---------------train actor loss-------------", np.mean(epoch_train_actor_loss_output))
        print("---------------train critic acc-------------", np.mean(epoch_train_critic_acc))
            
        epoch_vali_actor_loss_output = []
        epoch_vali_critic_acc = []
        
        actor_model.eval()
        critic_model.eval()
        baseline_model.eval()  
        
        with torch.no_grad():   
            for batch, (data1, y1, data2, y2, signal) in enumerate(vali_loader):
                                
                vali_actor_output_1 = actor_model(data1.float())
#                 vali_selection_1 = torch.bernoulli(vali_actor_output_1)
                vali_selection_1 = vali_actor_output_1.ge(0.5).type(torch.int)
                                          
                vali_actor_output_2 = actor_model(data2.float())
#                 vali_selection_2 = torch.bernoulli(vali_actor_output_2)
                vali_selection_2 = vali_actor_output_2.ge(0.5).type(torch.int)
                                          
                vali_critic_output_1 = critic_model.predict(data1.float(), vali_selection_1)
                vali_critic_output_2 = critic_model.predict(data2.float(), vali_selection_2)
                vali_baseline_output_1 = baseline_model.predict(data1.float())
                vali_baseline_output_2 = baseline_model.predict(data2.float())
                
                
                vali_critic_loss_1 = -((y1.ge(1).float().view(-1,1) * torch.log(vali_critic_output_1 + 1e-8)) + (1-y1.ge(1).float().view(-1,1)) * torch.log(1 - vali_critic_output_1 + 1e-8))
                vali_critic_loss_2 = -((y2.ge(1).float().view(-1,1) * torch.log(vali_critic_output_2 + 1e-8)) + (1-y2.ge(1).float().view(-1,1)) * torch.log(1 - vali_critic_output_2 + 1e-8))
                vali_baseline_loss_1 = -((y1.ge(1).float().view(-1,1) * torch.log(vali_baseline_output_1 + 1e-8)) + (1-y1.ge(1).float().view(-1,1)) * torch.log(1 - vali_baseline_output_1 + 1e-8))
                vali_baseline_loss_2 = -((y2.ge(1).float().view(-1,1) * torch.log(vali_baseline_output_2 + 1e-8)) + (1-y2.ge(1).float().view(-1,1)) * torch.log(1 - vali_baseline_output_2 + 1e-8))
        
                vali_actor_loss_output = pair_actor_loss(vali_actor_output_1, vali_actor_output_2, vali_selection_1, vali_selection_2, vali_critic_loss_1, vali_critic_loss_2, vali_baseline_loss_1, vali_baseline_loss_2, signal, lamda, beta, gamma)

                epoch_vali_actor_loss_output.append(vali_actor_loss_output.item())
                
        print("---------------Vali actor loss-------------", np.mean(epoch_vali_actor_loss_output))
        
        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        
        if epoch > 50:
            valid_loss = np.mean(epoch_vali_actor_loss_output)
            early_stopping(valid_loss, actor_model, critic_model, baseline_model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
        
    checkpoint = torch.load(saved_path)

    actor_model.load_state_dict(checkpoint['actor_model'])
    critic_model.load_state_dict(checkpoint['critic_model'])
    baseline_model.load_state_dict(checkpoint['baseline_model'])
        
    return actor_model,critic_model,baseline_model

In [14]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

### train

In [15]:
model_para = {'lambda':0.3,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':3,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [16]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008_paired/fold_{}/".format(k+1)

    train_path = path + 'train.npz'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.npz'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.npz'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    saved_path = 'fold_{}_saved_model_dict.pt'.format(k)
    
    trained_model_list = train_model(actor, critic, baseline, 2, saved_path, 10000, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- 3.308849323540926
---------------train critic acc------------- 0.3019748263888889
---------------Vali actor loss------------- 1.6139035008170388
2 ***********************************************************************
---------------train actor loss------------- 0.45527330144411987
---------------train critic acc------------- 0.3430989583333333
---------------Vali actor loss------------- -2.050597694787112
3 ***********************************************************************
---------------train actor loss------------- 2.193481574041976
---------------train critic acc------------- 0.5061848958333334
---------------Vali actor loss------------- -4.0314240943301805
4 ***********************************************************************
---------------train actor loss------------- 4.8270498683883085
---------------train critic acc------------- 0.5525173611111112
---

33 ***********************************************************************
---------------train actor loss------------- 40.90463304519653
---------------train critic acc------------- 0.5717230902777778
---------------Vali actor loss------------- 38.09905936501243
34 ***********************************************************************
---------------train actor loss------------- 44.41955828666687
---------------train critic acc------------- 0.5849609375
---------------Vali actor loss------------- 37.59408378601074
35 ***********************************************************************
---------------train actor loss------------- 40.040684440069725
---------------train critic acc------------- 0.5475260416666666
---------------Vali actor loss------------- 38.33156273581765
36 ***********************************************************************
---------------train actor loss------------- 43.580667205982735
---------------train critic acc------------- 0.5767144097222222
----------

### save

In [28]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight_observe_predictor/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight_observe_predictor/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight_observe_predictor/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32)_baseline_{}.pth'.format(k))

In [10]:
model_para = {'lambda':0.3,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':10,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [11]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- -0.2944671261227793




---------------Vali actor loss------------- -1.6837014
2 ***********************************************************************
---------------train actor loss------------- 0.6855324502620432
---------------Vali actor loss------------- 1.1541094
3 ***********************************************************************
---------------train actor loss------------- 2.2708988895432816
---------------Vali actor loss------------- 2.292619
4 ***********************************************************************
---------------train actor loss------------- 4.426977068185806
---------------Vali actor loss------------- 6.359755
5 ***********************************************************************
---------------train actor loss------------- 3.8663474689755173
---------------Vali actor loss------------- 3.423857
6 ***********************************************************************
---------------train actor loss------------- 4.444383292148511
---------------Vali actor loss------------- 

---------------Vali actor loss------------- 7.9583626
45 ***********************************************************************
---------------train actor loss------------- 7.308023197783364
---------------Vali actor loss------------- 9.032999
46 ***********************************************************************
---------------train actor loss------------- 7.492747350285451
---------------Vali actor loss------------- 5.2896647
47 ***********************************************************************
---------------train actor loss------------- 9.485047501822313
---------------Vali actor loss------------- 4.3924375
48 ***********************************************************************
---------------train actor loss------------- 7.132873858428663
---------------Vali actor loss------------- 10.183114
49 ***********************************************************************
---------------train actor loss------------- 8.582740074230564
---------------Vali actor loss-----------

---------------Vali actor loss------------- 10.497383
88 ***********************************************************************
---------------train actor loss------------- 12.592995827396711
---------------Vali actor loss------------- 3.8765829
89 ***********************************************************************
---------------train actor loss------------- 13.90849576310979
---------------Vali actor loss------------- 17.31951
90 ***********************************************************************
---------------train actor loss------------- 14.120286302847994
---------------Vali actor loss------------- 22.672045
91 ***********************************************************************
---------------train actor loss------------- 13.581466493507227
---------------Vali actor loss------------- 19.135857
92 ***********************************************************************
---------------train actor loss------------- 13.399105671379301
---------------Vali actor loss-------

---------------Vali actor loss------------- 19.041655
131 ***********************************************************************
---------------train actor loss------------- 16.416267152875662
---------------Vali actor loss------------- 17.645506
132 ***********************************************************************
---------------train actor loss------------- 16.7701943922374
---------------Vali actor loss------------- 15.221892
133 ***********************************************************************
---------------train actor loss------------- 16.063477335704697
---------------Vali actor loss------------- 13.132903
134 ***********************************************************************
---------------train actor loss------------- 14.898485499951574
---------------Vali actor loss------------- 9.510934
135 ***********************************************************************
---------------train actor loss------------- 15.03222688494457
---------------Vali actor loss----

---------------Vali actor loss------------- 15.5191765
174 ***********************************************************************
---------------train actor loss------------- 14.158835588230026
---------------Vali actor loss------------- 17.69267
175 ***********************************************************************
---------------train actor loss------------- 15.449658847517437
---------------Vali actor loss------------- 8.785049
176 ***********************************************************************
---------------train actor loss------------- 15.263107187218136
---------------Vali actor loss------------- 12.399906
177 ***********************************************************************
---------------train actor loss------------- 14.873797891040644
---------------Vali actor loss------------- 16.848085
178 ***********************************************************************
---------------train actor loss------------- 16.36348231219583
---------------Vali actor loss--

---------------Vali actor loss------------- 15.748225
217 ***********************************************************************
---------------train actor loss------------- 14.73068258828587
---------------Vali actor loss------------- 15.496613
218 ***********************************************************************
---------------train actor loss------------- 15.341890498995781
---------------Vali actor loss------------- 14.083677
219 ***********************************************************************
---------------train actor loss------------- 15.454007865654098
---------------Vali actor loss------------- 14.275512
220 ***********************************************************************
---------------train actor loss------------- 17.073863488104607
---------------Vali actor loss------------- 15.911139
221 ***********************************************************************
---------------train actor loss------------- 15.863547167844242
---------------Vali actor loss-

---------------Vali actor loss------------- 21.241096
260 ***********************************************************************
---------------train actor loss------------- 15.653392303321096
---------------Vali actor loss------------- 15.680472
261 ***********************************************************************
---------------train actor loss------------- 14.443356598416964
---------------Vali actor loss------------- 19.999512
262 ***********************************************************************
---------------train actor loss------------- 15.131787217325634
---------------Vali actor loss------------- 15.432413
263 ***********************************************************************
---------------train actor loss------------- 16.908271750642193
---------------Vali actor loss------------- 15.151612
264 ***********************************************************************
---------------train actor loss------------- 17.411523471275967
---------------Vali actor loss

In [12]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer10)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer10)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer10)_baseline_{}.pth'.format(k))

In [13]:
model_para = {'lambda':0.3,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':20,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [14]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- 1.0831521583928003




---------------Vali actor loss------------- 0.27952582
2 ***********************************************************************
---------------train actor loss------------- 0.6429348710096545
---------------Vali actor loss------------- 2.2088313
3 ***********************************************************************
---------------train actor loss------------- 2.9739159698494606
---------------Vali actor loss------------- 1.8433192
4 ***********************************************************************
---------------train actor loss------------- 5.1942235388689575
---------------Vali actor loss------------- 5.856215
5 ***********************************************************************
---------------train actor loss------------- 5.717119622147745
---------------Vali actor loss------------- 6.053752
6 ***********************************************************************
---------------train actor loss------------- 5.608241540276342
---------------Vali actor loss-------------

---------------Vali actor loss------------- 0.42864984
45 ***********************************************************************
---------------train actor loss------------- 2.188724322244525
---------------Vali actor loss------------- 3.0659423
46 ***********************************************************************
---------------train actor loss------------- 2.2782285606695547
---------------Vali actor loss------------- 3.0208695
47 ***********************************************************************
---------------train actor loss------------- 2.6569428510136075
---------------Vali actor loss------------- 2.9318845
48 ***********************************************************************
---------------train actor loss------------- 1.6592638649874263
---------------Vali actor loss------------- 1.2161523
49 ***********************************************************************
---------------train actor loss------------- 1.8709017481240962
---------------Vali actor loss-----

---------------Vali actor loss------------- 2.4546392
88 ***********************************************************************
---------------train actor loss------------- 4.610740127662818
---------------Vali actor loss------------- 5.2894497
89 ***********************************************************************
---------------train actor loss------------- 4.016903690786825
---------------Vali actor loss------------- 5.852193
90 ***********************************************************************
---------------train actor loss------------- 7.378683193276326
---------------Vali actor loss------------- 8.288901
91 ***********************************************************************
---------------train actor loss------------- 8.46491646890839
---------------Vali actor loss------------- 10.170243
92 ***********************************************************************
---------------train actor loss------------- 9.097498758385578
---------------Vali actor loss-------------

---------------Vali actor loss------------- 5.407619
131 ***********************************************************************
---------------train actor loss------------- 6.232252522061269
---------------Vali actor loss------------- 4.8466015
132 ***********************************************************************
---------------train actor loss------------- 6.98876033268041
---------------Vali actor loss------------- 6.528626
133 ***********************************************************************
---------------train actor loss------------- 7.326400867352883
---------------Vali actor loss------------- 8.582485
134 ***********************************************************************
---------------train actor loss------------- 7.833018287395437
---------------Vali actor loss------------- 9.228755
135 ***********************************************************************
---------------train actor loss------------- 8.114577373696697
---------------Vali actor loss----------

---------------Vali actor loss------------- 3.3492882
174 ***********************************************************************
---------------train actor loss------------- 3.6907995512915983
---------------Vali actor loss------------- 4.0989137
175 ***********************************************************************
---------------train actor loss------------- 3.560016553848982
---------------Vali actor loss------------- 2.887011
176 ***********************************************************************
---------------train actor loss------------- 3.3029358059995704
---------------Vali actor loss------------- 3.4610863
177 ***********************************************************************
---------------train actor loss------------- 3.6565154045820236
---------------Vali actor loss------------- 4.156652
178 ***********************************************************************
---------------train actor loss------------- 3.4030851216779814
---------------Vali actor loss---

216 ***********************************************************************
---------------train actor loss------------- 0.5559858822574218
---------------Vali actor loss------------- 0.859534
217 ***********************************************************************
---------------train actor loss------------- 0.4195757493790653
---------------Vali actor loss------------- 0.883564
218 ***********************************************************************
---------------train actor loss------------- 0.9583769579314523
---------------Vali actor loss------------- 0.55279064
219 ***********************************************************************
---------------train actor loss------------- 1.5005431117282972
---------------Vali actor loss------------- 1.1635033
220 ***********************************************************************
---------------train actor loss------------- 0.9769663028419018
---------------Vali actor loss------------- 1.2107862
221 ***************************

---------------Vali actor loss------------- 2.8966308
259 ***********************************************************************
---------------train actor loss------------- 1.5383173034836848
---------------Vali actor loss------------- 0.4348917
260 ***********************************************************************
---------------train actor loss------------- 1.7558702052467399
---------------Vali actor loss------------- 2.0393794
261 ***********************************************************************
---------------train actor loss------------- 2.0216005365881653
---------------Vali actor loss------------- 3.3279262
262 ***********************************************************************
---------------train actor loss------------- 2.54212244020568
---------------Vali actor loss------------- 1.9495829
263 ***********************************************************************
---------------train actor loss------------- 2.1648675687611103
---------------Vali actor loss--

---------------Vali actor loss------------- -0.4940973


In [15]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer20)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer20)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_epoch_1000_batch_32_layer20)_baseline_{}.pth'.format(k))

In [25]:
model_para = {'lambda':0.5,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':20,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [26]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- -1.0741773711310492




---------------Vali actor loss------------- -0.33604097
2 ***********************************************************************
---------------train actor loss------------- 1.0844473613219128
---------------Vali actor loss------------- 2.5757499
3 ***********************************************************************
---------------train actor loss------------- 3.0126346237957478
---------------Vali actor loss------------- 5.727892
4 ***********************************************************************
---------------train actor loss------------- 4.0984119065105915
---------------Vali actor loss------------- 3.6823015
5 ***********************************************************************
---------------train actor loss------------- 3.9960087798535824
---------------Vali actor loss------------- 5.072079
6 ***********************************************************************
---------------train actor loss------------- 5.61038205648462
---------------Vali actor loss------------

---------------Vali actor loss------------- 6.5243945
45 ***********************************************************************
---------------train actor loss------------- 5.245532418704695
---------------Vali actor loss------------- 6.0348754
46 ***********************************************************************
---------------train actor loss------------- 3.1004888295299478
---------------Vali actor loss------------- 1.1847433
47 ***********************************************************************
---------------train actor loss------------- 3.2805483088725143
---------------Vali actor loss------------- 7.5519805
48 ***********************************************************************
---------------train actor loss------------- 3.7659431803557606
---------------Vali actor loss------------- 2.0782504
49 ***********************************************************************
---------------train actor loss------------- 1.4073933760325115
---------------Vali actor loss------

---------------Vali actor loss------------- 3.7934995
88 ***********************************************************************
---------------train actor loss------------- 4.1043096867700415
---------------Vali actor loss------------- 6.011018
89 ***********************************************************************
---------------train actor loss------------- 4.105979457911518
---------------Vali actor loss------------- 3.219089
90 ***********************************************************************
---------------train actor loss------------- 3.989572920319107
---------------Vali actor loss------------- 5.0916867
91 ***********************************************************************
---------------train actor loss------------- 4.943462603415052
---------------Vali actor loss------------- 2.7787008
92 ***********************************************************************
---------------train actor loss------------- 4.945711199194193
---------------Vali actor loss-----------

---------------Vali actor loss------------- 0.30698946
131 ***********************************************************************
---------------train actor loss------------- 1.6864531762484047
---------------Vali actor loss------------- 1.7238628
132 ***********************************************************************
---------------train actor loss------------- 1.4498717426839802
---------------Vali actor loss------------- 2.4834569
133 ***********************************************************************
---------------train actor loss------------- 1.480203001656466
---------------Vali actor loss------------- 1.784334
134 ***********************************************************************
---------------train actor loss------------- 0.8880835763282247
---------------Vali actor loss------------- 1.5959532
135 ***********************************************************************
---------------train actor loss------------- 0.7979561537504196
---------------Vali actor loss-

---------------Vali actor loss------------- 2.0145776
174 ***********************************************************************
---------------train actor loss------------- 1.5046491937504873
---------------Vali actor loss------------- 1.798772
175 ***********************************************************************
---------------train actor loss------------- 1.8748620920297172
---------------Vali actor loss------------- 2.4384582
176 ***********************************************************************
---------------train actor loss------------- 2.2858989807880588
---------------Vali actor loss------------- 1.168397
177 ***********************************************************************
---------------train actor loss------------- 1.343345961223046
---------------Vali actor loss------------- 1.2006849
178 ***********************************************************************
---------------train actor loss------------- 1.2019820641726255
---------------Vali actor loss---

---------------Vali actor loss------------- 5.6721387
217 ***********************************************************************
---------------train actor loss------------- 4.791170747950673
---------------Vali actor loss------------- 5.690504
218 ***********************************************************************
---------------train actor loss------------- 4.899924480252796
---------------Vali actor loss------------- 3.8109632
219 ***********************************************************************
---------------train actor loss------------- 4.599868938326836
---------------Vali actor loss------------- 7.6109257
220 ***********************************************************************
---------------train actor loss------------- 3.8609434889836445
---------------Vali actor loss------------- 4.6532636
221 ***********************************************************************
---------------train actor loss------------- 4.227141659706831
---------------Vali actor loss-----

---------------Vali actor loss------------- 2.1635172
260 ***********************************************************************
---------------train actor loss------------- 3.918706821070777
---------------Vali actor loss------------- 3.732599
261 ***********************************************************************
---------------train actor loss------------- 4.141849621302551
---------------Vali actor loss------------- 5.5061684
262 ***********************************************************************
---------------train actor loss------------- 4.206177874985668
---------------Vali actor loss------------- 6.4318976
263 ***********************************************************************
---------------train actor loss------------- 3.4792751057280435
---------------Vali actor loss------------- 5.419855
264 ***********************************************************************
---------------train actor loss------------- 3.6965058454208903
---------------Vali actor loss-----

In [27]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.5_epoch_1000_batch_32_layer20)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.5_epoch_1000_batch_32_layer20)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.5_epoch_1000_batch_32_layer20)_baseline_{}.pth'.format(k))

In [38]:
model_para = {'lambda':0.8,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':20,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [29]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- 1.0191793087869883




---------------Vali actor loss------------- -1.3093151
2 ***********************************************************************
---------------train actor loss------------- 0.9105949605711632
---------------Vali actor loss------------- 1.5810789
3 ***********************************************************************
---------------train actor loss------------- 3.0984570417139263
---------------Vali actor loss------------- 7.3628035
4 ***********************************************************************
---------------train actor loss------------- 7.095878501319223
---------------Vali actor loss------------- 8.323466
5 ***********************************************************************
---------------train actor loss------------- 10.99039406577746
---------------Vali actor loss------------- 13.71819
6 ***********************************************************************
---------------train actor loss------------- 8.481222115457058
---------------Vali actor loss------------- 

---------------Vali actor loss------------- 17.593643
45 ***********************************************************************
---------------train actor loss------------- 17.92374293671714
---------------Vali actor loss------------- 15.75848
46 ***********************************************************************
---------------train actor loss------------- 16.73723229765892
---------------Vali actor loss------------- 16.283148
47 ***********************************************************************
---------------train actor loss------------- 15.114232785171932
---------------Vali actor loss------------- 18.67452
48 ***********************************************************************
---------------train actor loss------------- 15.77262057736516
---------------Vali actor loss------------- 15.835968
49 ***********************************************************************
---------------train actor loss------------- 17.4995742191871
---------------Vali actor loss------------

---------------Vali actor loss------------- 11.173768
88 ***********************************************************************
---------------train actor loss------------- 11.462314408272505
---------------Vali actor loss------------- 10.523157
89 ***********************************************************************
---------------train actor loss------------- 11.92140099654595
---------------Vali actor loss------------- 16.77257
90 ***********************************************************************
---------------train actor loss------------- 13.354075108965239
---------------Vali actor loss------------- 15.092188
91 ***********************************************************************
---------------train actor loss------------- 14.796048941711584
---------------Vali actor loss------------- 13.683862
92 ***********************************************************************
---------------train actor loss------------- 14.646945375121302
---------------Vali actor loss-------

---------------Vali actor loss------------- 8.765713
131 ***********************************************************************
---------------train actor loss------------- 9.775077104568481
---------------Vali actor loss------------- 9.352547
132 ***********************************************************************
---------------train actor loss------------- 10.489577849706015
---------------Vali actor loss------------- 9.659034
133 ***********************************************************************
---------------train actor loss------------- 11.41112764386667
---------------Vali actor loss------------- 10.080816
134 ***********************************************************************
---------------train actor loss------------- 11.6789903326167
---------------Vali actor loss------------- 13.334429
135 ***********************************************************************
---------------train actor loss------------- 12.563112442692121
---------------Vali actor loss-------

---------------Vali actor loss------------- 5.5429187
174 ***********************************************************************
---------------train actor loss------------- 5.324426757792632
---------------Vali actor loss------------- 6.674801
175 ***********************************************************************
---------------train actor loss------------- 5.380384258511993
---------------Vali actor loss------------- 6.0755095
176 ***********************************************************************
---------------train actor loss------------- 4.520746871829033
---------------Vali actor loss------------- 4.540888
177 ***********************************************************************
---------------train actor loss------------- 4.545377309951517
---------------Vali actor loss------------- 3.240717
178 ***********************************************************************
---------------train actor loss------------- 3.198405335760779
---------------Vali actor loss--------

---------------Vali actor loss------------- 16.229607
217 ***********************************************************************
---------------train actor loss------------- 10.296750091844135
---------------Vali actor loss------------- 11.9688225
218 ***********************************************************************
---------------train actor loss------------- 8.66235272337993
---------------Vali actor loss------------- 4.8813467
219 ***********************************************************************
---------------train actor loss------------- 7.471509302655856
---------------Vali actor loss------------- 6.8494444
220 ***********************************************************************
---------------train actor loss------------- 8.19471194098393
---------------Vali actor loss------------- 8.261973
221 ***********************************************************************
---------------train actor loss------------- 7.855408549308777
---------------Vali actor loss------

---------------Vali actor loss------------- 15.295985
260 ***********************************************************************
---------------train actor loss------------- 10.779181553257835
---------------Vali actor loss------------- 13.040385
261 ***********************************************************************
---------------train actor loss------------- 11.394768728978104
---------------Vali actor loss------------- 12.149069
262 ***********************************************************************
---------------train actor loss------------- 8.603662283056313
---------------Vali actor loss------------- 5.1497974
263 ***********************************************************************
---------------train actor loss------------- 6.265905201021168
---------------Vali actor loss------------- 6.4951215
264 ***********************************************************************
---------------train actor loss------------- 7.824322389231788
---------------Vali actor loss---

In [30]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20)_baseline_{}.pth'.format(k))

In [None]:
#---------------add detach--------------------------

In [39]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 0.5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- -1.784274232470327




---------------Vali actor loss------------- -2.6020882
2 ***********************************************************************
---------------train actor loss------------- -0.9925500758820109
---------------Vali actor loss------------- -0.23587959
3 ***********************************************************************
---------------train actor loss------------- 1.6518917394181092
---------------Vali actor loss------------- 3.5070264
4 ***********************************************************************
---------------train actor loss------------- 3.9100172287887998
---------------Vali actor loss------------- 4.3121815
5 ***********************************************************************
---------------train actor loss------------- 5.071371147202121
---------------Vali actor loss------------- 7.099902
6 ***********************************************************************
---------------train actor loss------------- 6.116591579384274
---------------Vali actor loss---------

---------------Vali actor loss------------- 7.6351333
45 ***********************************************************************
---------------train actor loss------------- 5.820213028126293
---------------Vali actor loss------------- 7.796458
46 ***********************************************************************
---------------train actor loss------------- 4.978836338760124
---------------Vali actor loss------------- 9.290235
47 ***********************************************************************
---------------train actor loss------------- 6.864234559651878
---------------Vali actor loss------------- 9.38106
48 ***********************************************************************
---------------train actor loss------------- 5.059230749391848
---------------Vali actor loss------------- 5.477357
49 ***********************************************************************
---------------train actor loss------------- 5.928599544697338
---------------Vali actor loss------------- 8

---------------Vali actor loss------------- 5.7948713
88 ***********************************************************************
---------------train actor loss------------- 5.626085559113158
---------------Vali actor loss------------- 7.957537
89 ***********************************************************************
---------------train actor loss------------- 5.869146042399937
---------------Vali actor loss------------- 6.245036
90 ***********************************************************************
---------------train actor loss------------- 5.714750263012117
---------------Vali actor loss------------- 6.5280433
91 ***********************************************************************
---------------train actor loss------------- 5.634143017232418
---------------Vali actor loss------------- 8.541059
92 ***********************************************************************
---------------train actor loss------------- 6.134966002156337
---------------Vali actor loss-------------

---------------Vali actor loss------------- 3.255199
131 ***********************************************************************
---------------train actor loss------------- 3.6557953535682626
---------------Vali actor loss------------- 4.4393525
132 ***********************************************************************
---------------train actor loss------------- 3.0640422287914486
---------------Vali actor loss------------- 3.8653374
133 ***********************************************************************
---------------train actor loss------------- 3.7240491956472397
---------------Vali actor loss------------- 3.7096791
134 ***********************************************************************
---------------train actor loss------------- 4.312792466746436
---------------Vali actor loss------------- 5.6181216
135 ***********************************************************************
---------------train actor loss------------- 3.901825881873568
---------------Vali actor loss---

---------------Vali actor loss------------- 0.9869052
174 ***********************************************************************
---------------train actor loss------------- 1.2348264135006402
---------------Vali actor loss------------- 1.110896
175 ***********************************************************************
---------------train actor loss------------- 0.3849238796780507
---------------Vali actor loss------------- 0.5234468
176 ***********************************************************************
---------------train actor loss------------- 0.13282320317294863
---------------Vali actor loss------------- 0.32086337
177 ***********************************************************************
---------------train actor loss------------- 0.28236118973129326
---------------Vali actor loss------------- 0.97850215
178 ***********************************************************************
---------------train actor loss------------- 0.720592842126886
---------------Vali actor lo

---------------Vali actor loss------------- -0.28548196
216 ***********************************************************************
---------------train actor loss------------- 0.5406338278618124
---------------Vali actor loss------------- 0.3767526
217 ***********************************************************************
---------------train actor loss------------- 0.5873882693962919
---------------Vali actor loss------------- 0.22195724
218 ***********************************************************************
---------------train actor loss------------- -0.011117949667904112
---------------Vali actor loss------------- 0.45594028
219 ***********************************************************************
---------------train actor loss------------- 0.33509531058371067
---------------Vali actor loss------------- -0.24463792
220 ***********************************************************************
---------------train actor loss------------- -0.09495904006891781
---------------Val

258 ***********************************************************************
---------------train actor loss------------- 2.271189805534151
---------------Vali actor loss------------- 2.3603058
259 ***********************************************************************
---------------train actor loss------------- 1.318896246039205
---------------Vali actor loss------------- 3.0267875
260 ***********************************************************************
---------------train actor loss------------- 2.549973025918007
---------------Vali actor loss------------- 11.189128
261 ***********************************************************************
---------------train actor loss------------- 3.4581763247648873
---------------Vali actor loss------------- 3.9742432
262 ***********************************************************************
---------------train actor loss------------- 2.926117561964525
---------------Vali actor loss------------- 2.596776
263 *******************************

---------------Vali actor loss------------- 0.4364274


In [40]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20_add_detach)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20_add_detach)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_0.5_lambda_0.8_epoch_1000_batch_32_layer20_add_detach)_baseline_{}.pth'.format(k))

In [41]:
model_para = {'lambda':1.5,
              'actor_h_dim':300,
              'critic_h_dim':200,
              'baseline_h_dim':200,
              'actor_output' :46,
              'critic_output':1,
              'baseline_output':1,
              'n_layer':20,
              'activation':'selu',
              'learning_rate':0.0001}
batch_size = 32

In [42]:
actor_list = []
critic_list = []
baseline_list = []

beta = 0.1
gamma = 5
samples_portion_of_all = 0.0001

for k in range(1):

    y_train = []
    x_train = []
    query_id = []
    array_train_x1 = []
    array_train_x0 = []

    path = "./MQ2008/Fold{}/".format(k+1)

    train_path = path + 'train.txt'
    train_loader = get_loader(train_path, batch_size, shuffle=True, drop_last=True)

    vali_path = path + 'vali.txt'
    vali_loader = get_loader(vali_path, batch_size, shuffle=True, drop_last=True)

    test_path = path + 'test.txt'
    test_loader = get_loader(test_path, batch_size, shuffle=True, drop_last=True)

    actor = Actor(46, model_para['actor_h_dim'], model_para['actor_output'], model_para['n_layer'], model_para['activation'])
    critic = Critic_RankNet(46, model_para['critic_h_dim'], model_para['critic_output'])
    baseline = Baseline_RankNet(46, model_para['baseline_h_dim'], model_para['baseline_output'])

    actor.apply(init_weights)
    critic.apply(init_weights)
    baseline.apply(init_weights)
    
    trained_model_list = train_model(baseline, actor, critic, 300, model_para['lambda'], beta, gamma)

    actor_list.append(trained_model_list[0])
    critic_list.append(trained_model_list[1])
    baseline_list.append(trained_model_list[2])



  This is separate from the ipykernel package so we can avoid doing imports until
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


1 ***********************************************************************
---------------train actor loss------------- -7.620888464980656




---------------Vali actor loss------------- -9.847595
2 ***********************************************************************
---------------train actor loss------------- -10.92538352807363
---------------Vali actor loss------------- -11.301282
3 ***********************************************************************
---------------train actor loss------------- -11.419057097699907
---------------Vali actor loss------------- -11.819247
4 ***********************************************************************
---------------train actor loss------------- -11.566725667979982
---------------Vali actor loss------------- -10.684356
5 ***********************************************************************
---------------train actor loss------------- -11.53250235815843
---------------Vali actor loss------------- -11.109984
6 ***********************************************************************
---------------train actor loss------------- -11.377495888206694
---------------Vali actor loss---

44 ***********************************************************************
---------------train actor loss------------- -11.78432430823644
---------------Vali actor loss------------- -11.574453
45 ***********************************************************************
---------------train actor loss------------- -11.470593157741758
---------------Vali actor loss------------- -11.380567
46 ***********************************************************************
---------------train actor loss------------- -11.465870234701368
---------------Vali actor loss------------- -11.360294
47 ***********************************************************************
---------------train actor loss------------- -11.531713508897358
---------------Vali actor loss------------- -11.394204
48 ***********************************************************************
---------------train actor loss------------- -11.338340878486633
---------------Vali actor loss------------- -11.389845
49 ***********************

---------------Vali actor loss------------- -11.630676
87 ***********************************************************************
---------------train actor loss------------- -11.58058794008361
---------------Vali actor loss------------- -11.496456
88 ***********************************************************************
---------------train actor loss------------- -11.530660218662685
---------------Vali actor loss------------- -11.60633
89 ***********************************************************************
---------------train actor loss------------- -11.521475046873093
---------------Vali actor loss------------- -11.60521
90 ***********************************************************************
---------------train actor loss------------- -11.56960521803962
---------------Vali actor loss------------- -11.701071
91 ***********************************************************************
---------------train actor loss------------- -11.553418613142437
---------------Vali actor los

---------------Vali actor loss------------- -11.341103
129 ***********************************************************************
---------------train actor loss------------- -11.271058294508192
---------------Vali actor loss------------- -11.315319
130 ***********************************************************************
---------------train actor loss------------- -11.29919891887241
---------------Vali actor loss------------- -11.410333
131 ***********************************************************************
---------------train actor loss------------- -11.266840368509293
---------------Vali actor loss------------- -11.261603
132 ***********************************************************************
---------------train actor loss------------- -11.17858644326528
---------------Vali actor loss------------- -11.018267
133 ***********************************************************************
---------------train actor loss------------- -11.22868557771047
---------------Vali act

---------------Vali actor loss------------- -11.387607
171 ***********************************************************************
---------------train actor loss------------- -11.431996994548374
---------------Vali actor loss------------- -11.439499
172 ***********************************************************************
---------------train actor loss------------- -11.393443690405952
---------------Vali actor loss------------- -11.459121
173 ***********************************************************************
---------------train actor loss------------- -11.367374605602688
---------------Vali actor loss------------- -11.467897
174 ***********************************************************************
---------------train actor loss------------- -11.387365420659384
---------------Vali actor loss------------- -11.12573
175 ***********************************************************************
---------------train actor loss------------- -11.339082896709442
---------------Vali a

---------------Vali actor loss------------- -11.173437
213 ***********************************************************************
---------------train actor loss------------- -11.129021873076757
---------------Vali actor loss------------- -11.126671
214 ***********************************************************************
---------------train actor loss------------- -11.131785482168198
---------------Vali actor loss------------- -11.170309
215 ***********************************************************************
---------------train actor loss------------- -11.084812465641233
---------------Vali actor loss------------- -11.132984
216 ***********************************************************************
---------------train actor loss------------- -11.068495432535807
---------------Vali actor loss------------- -11.104132
217 ***********************************************************************
---------------train actor loss------------- -11.102071212397682
---------------Vali 

---------------Vali actor loss------------- -11.3798
255 ***********************************************************************
---------------train actor loss------------- -11.417353361845016
---------------Vali actor loss------------- -11.403469
256 ***********************************************************************
---------------train actor loss------------- -11.33827426036199
---------------Vali actor loss------------- -11.241073
257 ***********************************************************************
---------------train actor loss------------- -11.305957615375519
---------------Vali actor loss------------- -11.422159
258 ***********************************************************************
---------------train actor loss------------- -11.374932785828909
---------------Vali actor loss------------- -11.409514
259 ***********************************************************************
---------------train actor loss------------- -11.359028942055172
---------------Vali act

---------------Vali actor loss------------- -11.315918
297 ***********************************************************************
---------------train actor loss------------- -11.301089803377787
---------------Vali actor loss------------- -11.316115
298 ***********************************************************************
---------------train actor loss------------- -11.293803350792992
---------------Vali actor loss------------- -11.261006
299 ***********************************************************************
---------------train actor loss------------- -11.231086840232214
---------------Vali actor loss------------- -11.25166
300 ***********************************************************************
---------------train actor loss------------- -11.292102528942955
---------------Vali actor loss------------- -11.2690325


In [43]:
for k in range(1):    
    torch.save(actor_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_5_lambda_1.5_epoch_1000_batch_32_layer20)_actor_{}.pth'.format(k))
    torch.save(critic_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_5_lambda_1.5_epoch_1000_batch_32_layer20)_critic_{}.pth'.format(k))
    torch.save(baseline_list[k].state_dict(), './tmp_model_saved/sample_0.0001_of_all_balance_model.train_initialize_weight/***_(beta_0.1_gamma_5_lambda_1.5_epoch_1000_batch_32_layer20)_baseline_{}.pth'.format(k))