In [1]:
import math
import torch.nn as nn
import tensorly
import time
import argparse
import sys
import os.path as osp
import numpy as np
import torch
import pandas as pd
import pickle

import torch.nn.functional as F
from utils.utils import auto_select_gpu
from models.prediction_model import MLPNet
from torch.nn.init import xavier_normal_
from sklearn.metrics import r2_score as r2
import matplotlib.pyplot as plt
import os.path as osp
import inspect
from torch_geometric.data import Data
from sklearn import preprocessing
import random
import pdb

from utils.utils import get_known_mask, mask_edge

In [2]:
data_list=torch.load('./dataset/data_list.pt')
data_list=data_list[1:]

In [5]:
len(data_list)

173

In [3]:
i=1
print('in the month', i)
print('price min',np.nanmin(data_list[i].df_price))
print('price max',np.nanmax(data_list[i].df_price))
print('missing number:',torch.sum(torch.isnan(data_list[i].df_price)),'total observation',data_list[i].df_price.shape[0])

print('yield min',np.nanmin(data_list[i].df_yield))
print('yield max',np.nanmax(data_list[i].df_yield))
print('missing number:',torch.sum(torch.isnan(data_list[i].df_yield)),'total observation',data_list[i].df_yield.shape[0])

print('return min',np.nanmin(data_list[i].df_return))
print('return max',np.nanmax(data_list[i].df_return))
print('missing number:',torch.sum(torch.isnan(data_list[i].df_return)),'total observation',data_list[i].df_return.shape[0])

in the month 1
price min 58.0
price max 120.75
missing number: tensor(26) total observation 54
yield min 4.719013
yield max 14.286286
missing number: tensor(26) total observation 54
return min -0.20732018
return max 0.11293959
missing number: tensor(26) total observation 54


In [6]:
#split the train and test set
train_data_list=data_list[0:120]
val_data_list=data_list[120:140]
test_data_list=data_list[140:]

print('train_month:',len(train_data_list))
print('val_month:',len(val_data_list))
print('test_month:',len(test_data_list))

train_month: 120
val_month: 20
test_month: 33


In [7]:
#utilities

def mape(y_true, y_pred, threshold=0.00001):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true_use=y_true[y_true != 0]
    y_pred_use=y_pred[y_true != 0]
    
    diff = np.abs((y_true_use - y_pred_use) / y_true_use)
    return  100.0 * np.mean(diff, axis=-1)
    
def Sequencial_list(data_list, lag):
    seq_data_x = []
    seq_data_y = []
    for i in range(lag,len(data_list)):   
            seq_data_x.append(data_list[i-lag:i])
            seq_data_y.append(data_list[i])
            
    #seq_data_x = np.array(seq_data_x) 
    #seq_data_y = np.array(seq_data_y)
    return seq_data_x, seq_data_y

def plot_loss(dic):
    plt.figure(figsize=(12,5))
    plt.subplot(1, 3, 1)

    val_loss_list=[]
    for i in range(len(dic['val_loss'])):
        val_loss_list.append(dic['val_loss'][i].detach().cpu().numpy())

    
    plt.plot(dic['train_loss'], label='Training loss ')
    plt.plot(val_loss_list, label='Validation loss ')
    plt.legend(frameon=False)
    plt.title('train and validation loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')

    plt.subplot(1, 3, 2)
    plt.plot(dic['val_mae'])
    plt.title('validation MAE')
    plt.xlabel('epoch')
    plt.ylabel('MAE')

    plt.subplot(1, 3, 3)
    plt.plot(dic['val_r2'])
    plt.title('validation R@')
    plt.xlabel('epoch')
    plt.ylabel('R2')
    plt.show()

from models.gnn_model import get_gnn
from utils.utils import build_optimizer, objectview, get_known_mask, mask_edge

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.best_hx=None
        self.delta = delta

    def __call__(self, val_loss, model,file_path):

        score = -val_loss
        

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model,file_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model,file_path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model,file_path):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), file_path)#file='checkpoint02.pt'
        self.val_loss_min = val_loss
     

In [8]:
train_index=np.array(range(len(train_data_list)))
val_index=np.array(range(len(val_data_list)))
test_index=np.array(range(len(test_data_list)))

train_index_x, train_index_y=Sequencial_list(train_index,12)
val_index_x, val_index_y=Sequencial_list(val_index,12)
test_index_x, test_index_y=Sequencial_list(test_index,12)
print('train seq:',len(train_index_x),len(train_index_y))
print('test seq:',len(test_index_x),len(test_index_y))

train seq: 108 108
test seq: 21 21


In [9]:
class GNNLayer(torch.nn.Module):
    def __init__(self,args):#,data_list
        super().__init__()
        self.gnnmodel = get_gnn(args).to(device)
        #random initial fund features
        self.x_fund = torch.nn.Parameter(torch.tensor(np.random.uniform(-1, 1, (args.shape[1],bond_dim)), 
                                     dtype=torch.float,
                                     requires_grad=True)).to(device)

        #bond node mlp
        bond_input_dim=bond_dim
        bond_mlp_hiddens = []#int(bond_input_dim/2)
        self.bond_mlp = MLPNet(bond_input_dim, args.node_dim, #bond input dimension to GNN node dimension
                           hidden_layer_sizes=bond_mlp_hiddens,
                           dropout=args.dropout).to(device)

        #fund node mlp
        fund_input_dim=bond_dim
        fund_mlp_hiddens = []#int(fund_input_dim/2)
        self.fund_mlp = MLPNet(fund_input_dim, args.node_dim, #fund input dimension to GNN node dimension
                           hidden_layer_sizes=bond_mlp_hiddens,
                           dropout=args.dropout).to(device)

        self.args=args
        
    def forward(self,data,edge_index,edge_attr):
        x_bond = data.bond_x.clone().detach().to(device) #bond features
        
        #edge dropout
        known_mask = get_known_mask(self.args.known, int(edge_attr.shape[0] / 2))
        double_known_mask = torch.cat((known_mask, known_mask), dim=0)
        known_edge_index, known_edge_attr = mask_edge(edge_index, edge_attr, double_known_mask, True)
        known_edge_index = known_edge_index
        known_edge_attr = known_edge_attr
        del edge_index,edge_attr,known_mask,double_known_mask
        
        #use the gnn model to learn the node embeddings
        bond_emb=self.bond_mlp(x_bond)
        fund_emb=self.fund_mlp(self.x_fund)
        del x_bond
        x=torch.cat((bond_emb,fund_emb),axis=0)
        x_embd = self.gnnmodel(x, known_edge_attr, known_edge_index)
        del bond_emb,fund_emb,x
        return(x_embd)

In [10]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len=200):
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        #pe.requires_grad = False
        self.register_buffer('pe', pe)

    def forward(self, x):
        return (x+self.pe[:x.size(0), :]) 

In [34]:
class Transformer(torch.nn.Module):
    def __init__(self,args_):#,data_list
        super().__init__()
        self.gnn=GNNLayer(args_).to(device)
        #self.LSTM_U = nn.LSTM(args_.node_dim, args_.lstm_hidden_dim, args_.lstm_layer)#for each bond build a lstm model
        #self.LSTM_V = nn.LSTM(args_.node_dim, args_.lstm_hidden_dim, args_.lstm_layer) #for each fund build a lstm model
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=args_.node_dim, nhead=args_.nhead, dropout=args_.dropout).to(device)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=args_.trans_num_layers).to(device) 
        self.transformer_encoder_fund = nn.TransformerEncoder(self.encoder_layer, num_layers=args_.trans_num_layers).to(device) 
        self.pos_encoder = PositionalEncoding(args_.node_dim).to(device) 


        #label = predict_model(emb_bond)
        predict_hiddens = [] #int(args_.lstm_hidden_dim/2)
        self.predict_model = MLPNet(args_.node_dim, 1,
                           hidden_layer_sizes=predict_hiddens,
                           dropout=args_.dropout).to(device)

        #network prediction
        input_dim = args_.node_dim * 2
        impute_hiddens = []#int(input_dim/2)
        self.impute_model = MLPNet(input_dim, 1,
                            hidden_layer_sizes=impute_hiddens,
                            hidden_activation=args_.impute_activation,
                            dropout=args_.dropout).to(device)
        
        self.args_ = args_

    

    def impute_network(self,x_embd,edge_index,edge_attr):
        pred_edge_attr = self.impute_model([x_embd[edge_index[0, :int(edge_attr.shape[0]/2)]], x_embd[edge_index[1, :int(edge_attr.shape[0]/2)]]]) #int(n_row * n_col)
        del x_embd,edge_index
        tar_edge_attr=edge_attr[:int(edge_attr.shape[0]/2)]
        del edge_attr
        return(pred_edge_attr,tar_edge_attr)

    def _generate_square_subsequent_mask(self,sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float(0)).masked_fill(mask == 1, float(1))
        return (mask)


        
    def forward(self,seq_x,bi,net):
        pred_y=0
        embd_list=[]
        bi=bi.long()

    


        for i in range(len(seq_x)):
            #learn the node embedding from gnn layer for lag-p time
            edge_index = seq_x[i].edge_index.clone().detach().to(device)
            edge_attr = seq_x[i].edge_attr.clone().detach().to(device)
            x_embd = self.gnn(seq_x[i],edge_index,edge_attr)
            #select the embedding for the bonds in the mini_batch
            #bi_embd=x_embd[bi,:]
            embd_list.append(x_embd)
            del x_embd
         
       


        #rnn model applied on bond embedding to predict the target in the future time step
        embd_tensor=torch.stack(embd_list,axis=0)#p * bi * d #lag of p times, batch size, gnn node dimension
        del embd_list
        #learn rnn hidden states for lag-p time
        mask = self._generate_square_subsequent_mask(embd_tensor.shape[0]).to(device)#len(src)
        bond_tensor=embd_tensor[:,:self.args_.shape[0],:]
        fund_tensor=embd_tensor[:,self.args_.shape[0]:,:]
        

        bond_tensor=self.pos_encoder(bond_tensor)
        bond_h_tensor=self.transformer_encoder(bond_tensor,mask)##p*bi*r # r is rnn hidden states dimension
        del bond_tensor

        fund_tensor=self.pos_encoder(fund_tensor)
        fund_h_tensor=self.transformer_encoder_fund(fund_tensor,mask)##p*bi*r # r is rnn hidden states dimension
        del fund_tensor

        embd_h_tensor=torch.cat((bond_h_tensor,fund_h_tensor),axis=1)
        #net
        if net: 
           pred_edge_attr=[]
           tar_edge_attr=[] 
           for i in range(1,len(seq_x)):
               #learn the node embedding from gnn layer for lag-p time
               edge_index = seq_x[i].edge_index.clone().detach().to(device)
               edge_attr = seq_x[i].edge_attr.clone().detach().to(device)
               pred_edge_attri,tar_edge_attri = self.impute_network(embd_h_tensor[i-1,:,:],edge_index,edge_attr)
               del edge_attr,edge_index
               pred_edge_attr.append(pred_edge_attri)
               tar_edge_attr.append(tar_edge_attri)
               del pred_edge_attri,tar_edge_attri
           del embd_h_tensor,fund_h_tensor    



        #target prediction
        obs_bond_h_tensor=bond_h_tensor[-1,bi,:]
        del bond_h_tensor
        pred_y=self.predict_model(obs_bond_h_tensor)#input b*r ouput b*1
        del obs_bond_h_tensor
        net_loss=0
        
        #calculate the loss of networks regression for lag-p times
        if net:
           pred_n=torch.cat(pred_edge_attr,axis=0)    
           tar_n=torch.cat(tar_edge_attr,axis=0)  
           del pred_edge_attr,tar_edge_attr
           net_loss=F.mse_loss(pred_n,tar_n)
           del pred_n,tar_n
       


        return(pred_y,net_loss)


In [35]:
def batch_function(iterable, n):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [36]:
class Experiment_d:
      def __init__(self,args=0,train_lstm_x=0,train_lstm_y=0,test_lstm_x=0,test_lstm_y=0,
                   train_data_list=0,test_data_list=0,train_index_x=0,train_index_y=0,test_index_x=0,test_index_y=0
                 ):
         self.args=args
         self.train_lstm_x=train_lstm_x
         self.train_lstm_y=train_lstm_y
         self.test_lstm_x=test_lstm_x
         self.test_lstm_y=test_lstm_y
         
         self.train_data_list=train_data_list
         self.test_data_list=test_data_list
         self.train_index_x=train_index_x
         self.train_index_y=train_index_y
         self.test_index_x=test_index_x
         self.test_index_y=test_index_y
      



      def evaluation_both_mini(self,model,val_data_list,val_index_x,val_index_y):
          with torch.no_grad():
              model.eval()
              prediction_list=[]
              target_list=[]
              ne_prediction_list=[]
              ne_target_list=[]
              outputp=[]
              outputt=[]
   

              for j in range(0, len(val_index_x)): #for each sequence
                 outputpj=[]   
                 outputtj=[]  
                 #data_list in lag of p days
                 seq_x=val_data_list[val_index_x[j][0]:(val_index_x[j][-1]+1)]
                 #data_list in t+1
                 seq_y=val_data_list[val_index_y[j]]
                 #all the bonds index have the observed target in the t+1
                 if self.args.which=='price':
                       y = seq_y.df_price.unsqueeze(1)
                 elif self.args.which=='yield':
                       y = seq_y.df_yield.unsqueeze(1)
                 elif self.args.which=='return':
                       y = seq_y.df_return.unsqueeze(1)
                  
                 bond_idx=np.where(~torch.isnan(y))[0]
                 #np.random.shuffle(bond_idx)
                 #bi is the minibatch of the bonds in t+1

                 for bi in batch_function(bond_idx,len(bond_idx)):#self.args.batch_size
                     #validation loss on y mini batch,not on networks
                     bi=torch.Tensor(bi)
                     pred_y,_=model.forward(seq_x,bi,False)
                     
                     target_y=y[bi.long(),:]
                     target_y=target_y.to(device)
                     prediction_list.append(pred_y)
                     target_list.append(target_y)
                     outputpj.append(pred_y)
                     outputtj.append(target_y)
                     del pred_y,target_y

                 outputp.append(outputpj)
                 outputt.append(outputtj)
                
              #for y
            
              obs_prediction_tensor=torch.cat(prediction_list,axis=0)
              obs_target_tensor=torch.cat(target_list,axis=0)

              val_mse=F.mse_loss(obs_prediction_tensor,obs_target_tensor)
              val_rmse = np.sqrt(val_mse.item())
              mae_function = torch.nn.L1Loss()
              val_mae=mae_function(obs_prediction_tensor,obs_target_tensor).item()
              val_rsq=r2(obs_target_tensor.detach().cpu().numpy(),obs_prediction_tensor.detach().cpu().numpy())
              val_mape=mape(obs_target_tensor.detach().cpu().numpy(),obs_prediction_tensor.detach().cpu().numpy())
              del obs_prediction_tensor,obs_target_tensor

              return(outputp,outputt,
                      val_mse,val_rmse,val_mae,val_rsq,val_mape
                      )


      def train_both_mini(self):
          model = Transformer(self.args)
          model.to(device)
          #model.load_state_dict(torch.load(log_path+'checkpoint_both_mini.pt'))
          #model.load_state_dict(torch.load(log_path+'checkpoint_both_mini.pt'))
          opt = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay)

          #start to train gnn
          print("Starting training y...")
          #hx=self.hx
          early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
          both_val_mse_list=[]
          val_rsq_list=[]
          
          for it in range(0, self.args.epochs):
             
              start_train = time.time()
              model.train()    
              losses = []

              #shuffle the train epochs
              temp = list(zip(self.train_index_x, self.train_index_y))
              random.shuffle(temp)
              res1, res2 = zip(*temp)
              # res1 and res2 come out as tuples, and so must be converted to lists.
              train_idx_x, train_idx_y = list(res1), list(res2)

              for j in range(0, len(self.train_index_x)): #for each sequence\
                 print(j,'/',len(self.train_index_x))
                 #data_list in lag of p days
                 seq_x=self.train_data_list[train_idx_x[j][0]:(train_idx_x[j][-1]+1)]
                 #data_list in t+1
                 seq_y=self.train_data_list[train_idx_y[j]]
                 #all the bonds index have the observed target in the t+1
                 if self.args.which=='price':
                       y = seq_y.df_price.unsqueeze(1)
                 elif self.args.which=='yield':
                       y = seq_y.df_yield.unsqueeze(1)
                 elif self.args.which=='return':
                       y = seq_y.df_return.unsqueeze(1)
                  
             
                 bond_idx=np.where(~torch.isnan(y))[0]
                 np.random.shuffle(bond_idx)
                 
                 for idx,bi in enumerate(batch_function(bond_idx,len(bond_idx))): #self.args.batch_size
                     #for each num_net batches, train the network regression. for other batches only train on the target.  
                     if idx%self.args.num_net==0:  
                        net=True
                     else:
                        net=True   
               
                     opt.zero_grad()
                     bi=torch.Tensor(bi)
                     pred_y,net_loss=model.forward(seq_x,bi,net)
                     target_y=y[bi.long(),:]
                     target_y=target_y.to(device)
                     loss=F.mse_loss(pred_y,target_y)+self.args.lambdayn * net_loss
                     loss.backward(retain_graph=False)
                     opt.step()
                     losses.append(loss.item())

              train_loss=np.mean(losses)  
              print('training y and network epoch,',it)
              print(time.time()-start_train)    
              print(train_loss)
                  


              with torch.no_grad(): #if True:
                   print("Validation both:")
                   outputp,outputt,both_val_mse,val_rmse,val_mae,val_rsq,val_mape= self.evaluation_both_mini(model,self.test_data_list,self.test_index_x,self.test_index_y)
                   #print(hx_val)
                   print('val_loss_both:{0}'.format(both_val_mse))
                   print('val_rmse_y:{0}'.format(val_rmse))
                   print('val_mae_y:{0}'.format(val_mae))
                   print('val_r2_y:{0}'.format(val_rsq))
                   print('val_mape_y:{0}'.format(val_mape))
                   both_val_mse_list.append(both_val_mse)
                   val_rsq_list.append(val_rsq)
                   
   
      
                   
                   early_stopping(both_val_mse, model,log_path+'checkpoint_both_mini.pt')
                   if early_stopping.early_stop:
                      print("Early stopping")
                      break
                      

          # load the last checkpoint with the best model
          model.load_state_dict(torch.load(log_path+'checkpoint_both_mini.pt'))

          dic = dict()
          dic['outputp']=outputp
          dic['outputt']=outputt
          dic['both_val_mse']=both_val_mse_list
          dic['val_rsq']=val_rsq_list
          dic['model']=model
          dic['args']=self.args

         
          pickle.dump(dic, open(log_path + 'result.pkl', "wb"))
          torch.save(model, log_path + 'model.pt')
          #torch.save(prediction_tensor, log_path + 'prediction_tensor.pt')
          #torch.save(target_tensor, log_path + 'target_tensor.pt')
          return (dic) 
            

In [37]:
    #######seperate
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--model_types', type=str, default='EGSAGE_EGSAGE')#our message passing method, other options are 'GCN','GAT','GraphSage'
    parser.add_argument('--post_hiddens', type=str, default=None,) 
    parser.add_argument('--concat_states', action='store_true', default=False)
    parser.add_argument('--norm_embs', type=str, default='0_0') # default to be all true
    parser.add_argument('--aggr', type=str, default='mean')
    parser.add_argument('--node_dim', type=int, default=100)
    parser.add_argument('--edge_dim', type=int, default=100)
    parser.add_argument('--edge_mode', type=int, default=1)  # 0: use it as weight 1: as input to mlp
    parser.add_argument('--gnn_activation', type=str, default='none')
    parser.add_argument('--impute_hiddens', type=str, default='')
    parser.add_argument('--impute_activation', type=str, default='none')
    parser.add_argument('--predict_hiddens', type=str, default='')
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--epochs_adj', type=int, default=1000)
    parser.add_argument('--ntrials', type=int, default=1)

    
    parser.add_argument('--opt', type=str, default='adam')
    parser.add_argument('--opt_scheduler', type=str, default='none')
    parser.add_argument('--opt_restart', type=int, default=0)
    parser.add_argument('--opt_decay_step', type=int, default=1000)
    parser.add_argument('--opt_decay_rate', type=float, default=0)
    parser.add_argument('--dropout', type=float, default=0)
    parser.add_argument('--weight_decay', type=float, default=0.)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--known', type=float, default=0.7) # 1 - edge dropout rate
    parser.add_argument('--valid', type=float, default=0.) # valid-set ratio
    parser.add_argument('--seed', type=int, default=0)

    
    parser.add_argument('--log_dir', type=str, default='y0')
    parser.add_argument('--train_edge', type=float, default=0.7)
    parser.add_argument('--split_sample', type=float, default=0.)
    parser.add_argument('--split_by', type=str, default='y') # 'y', 'random'
    parser.add_argument('--split_train', action='store_true', default=False)
    parser.add_argument('--split_test', action='store_true', default=False)
    parser.add_argument('--train_y', type=float, default=0.7)
    parser.add_argument('--node_mode', type=int, default=0)  # 0: feature onehot, sample all 1; 1: all onehot
    parser.add_argument('--shape', type=int, default=(1000,1000,174))  
    parser.add_argument('--seperate_label_loss', action='store_true', default=False)  
    parser.add_argument('--lstm_hidden_dim', type=int, default=100)  
    parser.add_argument('--which_rnn', type=str, default='LSTM') 
    parser.add_argument('--patience', type=int, default=5) 
    parser.add_argument('--lambda_label_recons_loss', type=float, default=1e-3) 
    parser.add_argument('--lambda_net_pred_loss', type=float, default=1e-3) 
    parser.add_argument('--lambda_net_recons_loss', type=float, default=1e-3) 
    parser.add_argument('--ori_y', action='store_true', default=True)
    parser.add_argument('--orginal_y',  action='store_true', default=False)
    parser.add_argument('--converge', type=float, default=1e-3) 
    parser.add_argument('--pretrained_gnn', action='store_true', default=False)
    parser.add_argument('--pretrained_lstm', action='store_true', default=False)
    parser.add_argument('--lstm_layer', type=int, default=1) 
    parser.add_argument('--batch_size', type=int, default=64) 
    parser.add_argument('--batch_size_network', type=int, default=64) 
    parser.add_argument('--lambda1', type=float, default=0) 
    parser.add_argument('--alternating_epoches', type=int, default=200)
    parser.add_argument('--lambdayn', type=float, default=1e-3) 
    parser.add_argument('--which', type=str, default='yield') 
    parser.add_argument('--num_net', type=int, default=10) 
    parser.add_argument('--trans_num_layers', type=int, default=1) 
    parser.add_argument('--nhead', type=int, default=10) 
    args=parser.parse_known_args()[0]
    print(args)

Namespace(model_types='EGSAGE_EGSAGE', post_hiddens=None, concat_states=False, norm_embs='0_0', aggr='mean', node_dim=100, edge_dim=100, edge_mode=1, gnn_activation='none', impute_hiddens='', impute_activation='none', predict_hiddens='', epochs=1000, epochs_adj=1000, ntrials=1, opt='adam', opt_scheduler='none', opt_restart=0, opt_decay_step=1000, opt_decay_rate=0, dropout=0, weight_decay=0.0, lr=0.001, known=0.7, valid=0.0, seed=0, log_dir='y0', train_edge=0.7, split_sample=0.0, split_by='y', split_train=False, split_test=False, train_y=0.7, node_mode=0, shape=(1000, 1000, 174), seperate_label_loss=False, lstm_hidden_dim=100, which_rnn='LSTM', patience=5, lambda_label_recons_loss=0.001, lambda_net_pred_loss=0.001, lambda_net_recons_loss=0.001, ori_y=True, orginal_y=False, converge=0.001, pretrained_gnn=False, pretrained_lstm=False, lstm_layer=1, batch_size=64, batch_size_network=64, lambda1=0, alternating_epoches=200, lambdayn=0.001, which='yield', num_net=10, trans_num_layers=1, nhead

In [38]:
# select device
if torch.cuda.is_available():
    cuda = auto_select_gpu()
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = str(cuda)
    print('Using GPU {}'.format(os.environ['CUDA_VISIBLE_DEVICES']))
    device = torch.device('cuda:{}'.format(cuda))
else:
    print('Using CPU')
    device = torch.device('cpu')

bond_dim=train_data_list[0].bond_x.shape[1]
fund_dim=train_data_list[0].bond_x.shape[1]
shape=(train_data_list[0].bond_x.shape[0],57,len(data_list))
print(bond_dim,fund_dim,shape)

GPU mem: 2081, Select GPU 0
Using GPU 0
101 101 (54, 57, 173)


In [39]:
def get_result(experiment,log_path,val_data_list,val_index_x,val_index_y,dic):
    #model=torch.load(log_path + 'model.pt')
    outputp,outputt,both_val_mse,val_rmse,val_mae,val_rsq,val_mape= experiment.evaluation_both_mini(dic['model'],val_data_list,val_index_x,val_index_y)

    print('rmse:',val_rmse,'mae:',val_mae,'mape:',val_mape,'r2',val_rsq)
    return(outputp,outputt,val_rmse,val_mae,val_mape,val_rsq)

In [40]:
#training process using the sampled dataset uploaded as supplimentary materials this time
args.log_dir='yield_trans_1e5_sample'
args.epochs=20000
args.lr=1e-4
args.shape=shape
args.node_dim=100
args.edge_mode = 1


args.model_types='EGSAGE_EGSAGE' #which GNN model
args.norm_embs='0_0' 
args.gnn_activation='none'
args.impute_activation='none'
args.patience=40 #patience for early stop
args.dropout=0.
args.known=0.7 #edge dropout rate
args.batch_size=512
args.lambdany=1e-5 #coefficient for self-regularization term
args.num_net=2
args.trans_num_layers=1 #transformer attention sublayers
args.nhead=10 # #transformer attention heads
args.which='yield' #target, other option 'price'

log_path='./test/yield_result/{}/'.format(args.log_dir)
if os.path.exists(log_path) !=True:
   os.makedirs(log_path)

experiment = Experiment_d(args=args,train_data_list=train_data_list,test_data_list=val_data_list,train_index_x=train_index_x,train_index_y=train_index_y,
                          test_index_x=val_index_x,test_index_y=val_index_y)
dic=experiment.train_both_mini()
outputp,outputt,val_rmse,val_mae,val_mape,val_rsq=get_result(experiment,log_path,test_data_list,test_index_x,test_index_y,dic)

['EGSAGE', 'EGSAGE'] [False, False] [100]
Starting training y...
0 / 108
1 / 108
2 / 108
3 / 108
4 / 108
5 / 108
6 / 108
7 / 108
8 / 108
9 / 108
10 / 108
11 / 108
12 / 108
13 / 108
14 / 108
15 / 108
16 / 108
17 / 108
18 / 108
19 / 108
20 / 108
21 / 108
22 / 108
23 / 108
24 / 108
25 / 108
26 / 108
27 / 108
28 / 108
29 / 108
30 / 108
31 / 108
32 / 108
33 / 108
34 / 108
35 / 108
36 / 108
37 / 108
38 / 108
39 / 108
40 / 108
41 / 108
42 / 108
43 / 108
44 / 108
45 / 108
46 / 108
47 / 108
48 / 108
49 / 108
50 / 108
51 / 108
52 / 108
53 / 108
54 / 108
55 / 108
56 / 108
57 / 108
58 / 108
59 / 108
60 / 108
61 / 108
62 / 108
63 / 108
64 / 108
65 / 108
66 / 108
67 / 108
68 / 108
69 / 108
70 / 108
71 / 108
72 / 108
73 / 108
74 / 108
75 / 108
76 / 108
77 / 108
78 / 108
79 / 108
80 / 108
81 / 108
82 / 108
83 / 108
84 / 108
85 / 108
86 / 108
87 / 108
88 / 108
89 / 108
90 / 108
91 / 108
92 / 108
93 / 108
94 / 108
95 / 108
96 / 108
97 / 108
98 / 108
99 / 108
100 / 108
101 / 108
102 / 108
103 / 108
104 /

In [18]:
#training process using the full dataset
args.log_dir='yield_trans_1e5'
args.epochs=20000
args.lr=1e-4
args.shape=shape
args.node_dim=100
args.edge_mode = 1


args.model_types='EGSAGE_EGSAGE' #which GNN model
args.norm_embs='0_0' 
args.gnn_activation='none'
args.impute_activation='none'
args.patience=40 #patience for early stop
args.dropout=0.
args.known=0.7 #edge dropout rate
args.batch_size=512
args.lambdany=1e-5 #coefficient for self-regularization term
args.num_net=2
args.trans_num_layers=1 #transformer attention sublayers
args.nhead=10 # #transformer attention heads
args.which='yield' #target, other option 'price'

log_path='./test/yield_result/{}/'.format(args.log_dir)
if os.path.exists(log_path) !=True:
   os.makedirs(log_path)

experiment = Experiment_d(args=args,train_data_list=train_data_list,test_data_list=val_data_list,train_index_x=train_index_x,train_index_y=train_index_y,
                          test_index_x=val_index_x,test_index_y=val_index_y)
dic=experiment.train_both_mini()
outputp,outputt,val_rmse,val_mae,val_mape,val_rsq=get_result(experiment,log_path,test_data_list,test_index_x,test_index_y,dic)

['EGSAGE', 'EGSAGE'] [False, False] [100]
Starting training y...
0 / 108
1 / 108
2 / 108
3 / 108
4 / 108
5 / 108
6 / 108
7 / 108
8 / 108
9 / 108
10 / 108
11 / 108
12 / 108
13 / 108
14 / 108
15 / 108
16 / 108
17 / 108
18 / 108
19 / 108
20 / 108
21 / 108
22 / 108
23 / 108
24 / 108
25 / 108
26 / 108
27 / 108
28 / 108
29 / 108
30 / 108
31 / 108
32 / 108
33 / 108
34 / 108
35 / 108
36 / 108
37 / 108
38 / 108
39 / 108
40 / 108
41 / 108
42 / 108
43 / 108
44 / 108
45 / 108
46 / 108
47 / 108
48 / 108
49 / 108
50 / 108
51 / 108
52 / 108
53 / 108
54 / 108
55 / 108
56 / 108
57 / 108
58 / 108
59 / 108
60 / 108
61 / 108
62 / 108
63 / 108
64 / 108
65 / 108
66 / 108
67 / 108
68 / 108
69 / 108
70 / 108
71 / 108
72 / 108
73 / 108
74 / 108
75 / 108
76 / 108
77 / 108
78 / 108
79 / 108
80 / 108
81 / 108
82 / 108
83 / 108
84 / 108
85 / 108
86 / 108
87 / 108
88 / 108
89 / 108
90 / 108
91 / 108
92 / 108
93 / 108
94 / 108
95 / 108
96 / 108
97 / 108
98 / 108
99 / 108
100 / 108
101 / 108
102 / 108
103 / 108
104 /