In [12]:
import copy
import json
import os
import pickle
import torch.nn.functional as F
from torch.optim import Adam
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as Data
from sklearn.metrics import mean_absolute_error
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [8]:
def MRR(test_y, pred_y, k=5):
    predict = pd.DataFrame([])
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict = predict.sort_values("pred_y", ascending=False).reset_index(drop=True)
    predict["pred_y_rank_index"] = (predict.index) + 1
    predict = predict.sort_values("y", ascending=False)

    return sum(1 / predict["pred_y_rank_index"][:k])


def Precision(test_y, pred_y, k=5):
    predict = pd.DataFrame([])
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict1 = predict.sort_values("pred_y", ascending=False)
    predict2 = predict.sort_values("y", ascending=False)
    correct = len(list(set(predict1["y"][:k].index) & set(predict2["y"][:k].index)))
    return correct / k


def IRR(test_y, pred_y, k=5):
    predict = pd.DataFrame([])
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict1 = predict.sort_values("pred_y", ascending=False)
    predict2 = predict.sort_values("y", ascending=False)
    return sum(predict2["y"][:k]) - sum(predict1["y"][:k])


def Acc(test_y, pred_y):
    test_y = np.ravel(test_y)
    pred_y = np.ravel(pred_y)
    pred_y = (pred_y > 0) * 1
    acc_score = sum(test_y == pred_y) / len(pred_y)

    return acc_score

In [9]:
# load data
data_path = '/openbayes/input/input0/sp500_data.pkl'
with open(data_path, "rb") as f:
    data = pickle.load(f)
inner_edge = np.array(np.load("/openbayes/input/input0/inner_edge.npy"))
# inner10_edge = np.array(np.load("./edge_10.npy"))
# inner20_edge = np.array(np.load("./Taiwan_inner_edge20.npy"))
outer_edge = np.array(np.load("/openbayes/input/input0/outer_edge.npy"))
time_step = data["train"]["x1"].shape[-2]
input_dim = data["train"]["x1"].shape[-1]
num_weeks = data["train"]["x1"].shape[0]
train_size = int(num_weeks * 0.2)
device = 'cuda'
agg_week_num = 3

# convert data into torch dtype
train_w1 = torch.Tensor(data["train"]["x1"]).float().to(device)
train_w2 = torch.Tensor(data["train"]["x2"]).float().to(device)
train_w3 = torch.Tensor(data["train"]["x3"]).float().to(device)
train_w4 = torch.Tensor(data["train"]["x4"]).float().to(device)
inner_edge = torch.tensor(inner_edge.T, dtype=torch.int64).to(device)
# inner10_edge = torch.tensor(inner10_edge.T, dtype=torch.int64).to(device)
# inner20_edge = torch.tensor(inner20_edge.T, dtype=torch.int64).to(device)
outer_edge = torch.tensor(outer_edge.T, dtype=torch.int64).to(device)

# test data
test_w1 = torch.Tensor(data["test"]["x1"]).float().to(device)
test_w2 = torch.Tensor(data["test"]["x2"]).float().to(device)
test_w3 = torch.Tensor(data["test"]["x3"]).float().to(device)
test_w4 = torch.Tensor(data["test"]["x4"]).float().to(device)
test_data = [test_w1, test_w2, test_w3, test_w4]  # [-agg_week_num:]

# label data
train_reg = torch.Tensor(data["train"]["y_return ratio"]).float()
train_cls = torch.Tensor(data["train"]["y_up_or_down"]).float()
test_y = data["test"]["y_return ratio"]
test_cls = data["test"]["y_up_or_down"]
test_shape = test_y.shape[0]
loop_number = 100
ks_list = [5, 10, 20]

In [41]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv, SAGPooling,global_mean_pool , global_max_pool 



class AttentionBlock(nn.Module):
    def __init__(self,time_step,dim):
        super(AttentionBlock, self).__init__()
        self.attention_matrix = nn.Linear(time_step, time_step)

    def forward(self, inputs):
        inputs_t = torch.transpose(inputs,2,1) # (batch_size, input_dim, time_step)
        attention_weight = self.attention_matrix(inputs_t)
        attention_probs = F.softmax(attention_weight,dim=-1)
        attention_probs = torch.transpose(attention_probs,2,1)
        attention_vec = torch.mul(attention_probs, inputs)
        attention_vec = torch.sum(attention_vec,dim=1)
        return attention_vec, attention_probs

class SequenceEncoder(nn.Module):
    def __init__(self,input_dim,time_step,hidden_dim):
        super(SequenceEncoder, self).__init__()
        self.encoder = nn.GRU(input_size=input_dim,hidden_size=hidden_dim,num_layers=1,batch_first=True)
        self.attention_block = AttentionBlock(time_step,hidden_dim) 
        self.dropout = nn.Dropout(0.2)
        self.dim = hidden_dim
    
    def forward(self,seq):
        '''
        inp : torch.tensor (batch,time_step,input_dim)
        '''
        seq_vector,_ = self.encoder(seq)
        seq_vector = self.dropout(seq_vector)
        attention_vec, _ = self.attention_block(seq_vector)
        attention_vec = attention_vec.view(-1,1,self.dim) # prepare for concat
        return attention_vec

class CategoricalGraphAtt(nn.Module):
    def __init__(self,input_dim,time_step,hidden_dim,inner_edge,outer_edge,input_num,use_gru,device):
        super(CategoricalGraphAtt, self).__init__()

        # basic parameters
        self.dim = hidden_dim
        self.input_dim = input_dim
        self.time_step = time_step
        self.inner_edge = inner_edge
        self.outer_edge = outer_edge
        self.input_num = input_num
        self.use_gru = use_gru
        self.device = device

        # hidden layers
        self.pool_attention = AttentionBlock(25,hidden_dim)
        if self.use_gru:
            self.weekly_encoder = nn.GRU(hidden_dim,hidden_dim)
        self.encoder_list = nn.ModuleList([SequenceEncoder(input_dim,time_step,hidden_dim) for _ in range(input_num)])
        self.cat_gat = GATConv(hidden_dim,hidden_dim)
        self.inner_gat = GATConv(hidden_dim,hidden_dim)
        self.weekly_attention = AttentionBlock(input_num,hidden_dim)
        self.fusion = nn.Linear(hidden_dim*3,hidden_dim)

        # output layer 
        self.reg_layer = nn.Linear(hidden_dim,1)
        self.cls_layer = nn.Linear(hidden_dim,1)

    def forward(self,weekly_batch):
        print('============= start one forward =============')
        
        # x has shape (category_num, stocks_num, time_step, dim)
        print(f'weekly batch[i] size = {weekly_batch[0].size()}')  # torch.Size([475, 7, 30])
        weekly_embedding = self.encoder_list[0](weekly_batch[0].view(-1,self.time_step,self.input_dim))
        print(f'weekly_embedding size = {weekly_embedding.size()}')    # torch.Size([475, 1, 64])

        # calculate embeddings for the rest of weeks
        for week_idx in range(1,self.input_num):
            weekly_inp = weekly_batch[week_idx]
            weekly_inp = weekly_inp.view(-1,self.time_step,self.input_dim)
            week_stock_embedding = self.encoder_list[week_idx](weekly_inp)
            weekly_embedding = torch.cat((weekly_embedding,week_stock_embedding),dim=1)
        print(f'after concat weekly_embedding size = {weekly_embedding.size()}')  # torch.Size([475, 3, 64])

        # merge weeks 
        if self.use_gru:
            weekly_embedding,_ = self.weekly_encoder(weekly_embedding)
        weekly_att_vector,_ = self.weekly_attention(weekly_embedding)
        print(f'weekly_att_vector size = {weekly_att_vector.size()}')  # torch.Size([475, 64])

        # inner graph interaction 
        inner_graph_embedding = self.inner_gat(weekly_att_vector,self.inner_edge)
        print(f'inner_graph_embedding size = {inner_graph_embedding.size()}')  # torch.Size([475, 64])
        
        inner_graph_embedding = inner_graph_embedding.view(19,25,-1)
        print(f'after reshape inner_graph_embedding size = {inner_graph_embedding.size()}')  # torch.Size([19, 25, 64])
        
        # pooling 
        weekly_att_vector = weekly_att_vector.view(19,25,-1)  # torch.Size([19, 25, 64])
        category_vectors,_ =  self.pool_attention(weekly_att_vector) #torch.max(weekly_att_vector,dim=1)
        print(f'category_vectors size = {category_vectors.size()}')  # torch.Size([19, 64])
        
        # use category graph attention 
        category_vectors = self.cat_gat(category_vectors,self.outer_edge)
        print(f'after sector gat category_vectors size = {category_vectors.size()}')  # torch.Size([19, 64])
        
        category_vectors = category_vectors.unsqueeze(1).expand(-1,25,-1)
        print(f'after reshape category_vectors size = {category_vectors.size()}')  # torch.Size([19, 25, 64])
        
        # fusion 
        fusion_vec = torch.cat((weekly_att_vector,category_vectors,inner_graph_embedding),dim=-1)
        print(f'cat fusion_vec size = {fusion_vec.size()}')  # torch.Size([19, 25, 192])
        
        fusion_vec = self.fusion(fusion_vec)
        print(f'linear fusion_vec size = {fusion_vec.size()}')  # torch.Size([19, 25, 64])
        
        fusion_vec = torch.relu(fusion_vec)
        print(f'relu fusion_vec size = {fusion_vec.size()}')  # torch.Size([19, 25, 64])

        # output
        reg_output = self.reg_layer(fusion_vec)
        print(f'reg_output size = {reg_output.size()}')  # torch.Size([19, 25, 1])
        reg_output = torch.flatten(reg_output)
        print(f'flatten reg_output size = {reg_output.size()}')  # torch.Size([475])
        
        cls_output = torch.sigmoid(self.cls_layer(fusion_vec))
        cls_output = torch.flatten(cls_output)
        
        print('============= one forward over =============')
        return reg_output, cls_output

    def predict_toprank(self,test_data,device,top_k=5):
        y_pred_all_reg, y_pred_all_cls = [], []
        test_w1,test_w2,test_w3,test_w4 = test_data
        for idx,_ in enumerate(test_w2):
            batch_x1,batch_x2,batch_x3,batch_x4 = test_w1[idx].to(self.device), \
                                        test_w2[idx].to(self.device),\
                                        test_w3[idx].to(self.device),\
                                        test_w4[idx].to(self.device)
            batch_weekly = [batch_x1,batch_x2,batch_x3,batch_x4][-self.input_num:]
            pred_reg, pred_cls = self.forward(batch_weekly)
            pred_reg, pred_cls = pred_reg.cpu().detach().numpy(), pred_cls.cpu().detach().numpy()
            y_pred_all_reg.extend(pred_reg.tolist())
            y_pred_all_cls.extend(pred_cls.tolist())
        return y_pred_all_reg, y_pred_all_cls

In [42]:
def train():
    global test_y
    l2 = 0.1
    lr = 0.1
    beta = 0.1
    gamma = 0.1
    alpha = 0.1
    device = 'cuda'
    epochs = 50
    hidden_dim = 64
    use_gru = False

    model = CategoricalGraphAtt(input_dim, time_step, hidden_dim, inner_edge, outer_edge, agg_week_num, use_gru, device).to(device)
    
    # initialize parameters
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of parameters:%s" % pytorch_total_params)

    # optimizer & loss
    optimizer = optim.Adam(model.parameters(), weight_decay=l2, lr=lr)
    reg_loss_func = nn.L1Loss(reduction="none")
    cls_loss_func = nn.BCELoss(reduction="none")

    # save best model
    best_metric_IRR = None
    best_metric_MRR = None
    best_results_IRR = None
    best_results_MRR = None
    global_best_IRR = 999
    global_best_MRR = 0

    r_loss = torch.tensor([]).float().to(device)
    c_loss = torch.tensor([]).float().to(device)
    ra_loss = torch.tensor([]).float().to(device)
    for epoch in range(epochs):
        for week in range(2):
            model.train()  # prep to train model
            batch_x1, batch_x2, batch_x3, batch_x4 = (
                train_w1[week].to(device),
                train_w2[week].to(device),
                train_w3[week].to(device),
                train_w4[week].to(device),
            )
            batch_weekly = [batch_x1, batch_x2, batch_x3, batch_x4][-3:]
            batch_reg_y = train_reg[week].view(-1, 1).to(device)
            batch_cls_y = train_cls[week].view(-1, 1).to(device)
            reg_out, cls_out = model(batch_weekly)
            reg_out, cls_out = reg_out.view(-1, 1), cls_out.view(-1, 1)

            # calculate loss
            reg_loss = reg_loss_func(reg_out, batch_reg_y)  # (target_size, 1)
            cls_loss = cls_loss_func(cls_out, batch_cls_y)
            rank_loss = torch.relu(-(reg_out.view(-1, 1) * reg_out.view(1, -1)) * (batch_reg_y.view(-1, 1) * batch_reg_y.view(1, -1)))
            c_loss = torch.cat((c_loss, cls_loss.view(-1, 1)))
            r_loss = torch.cat((r_loss, reg_loss.view(-1, 1)))
            ra_loss = torch.cat((ra_loss, rank_loss.view(-1, 1)))

            if (week + 1) % 1 == 0:
                cls_loss = beta * torch.mean(c_loss)
                reg_loss = alpha * torch.mean(r_loss)
                rank_loss = gamma * torch.sum(ra_loss)
                loss = reg_loss + rank_loss + cls_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                r_loss = torch.tensor([]).float().to(device)
                c_loss = torch.tensor([]).float().to(device)
                ra_loss = torch.tensor([]).float().to(device)
                if (week + 1) % 144 == 0:
                    print("REG Loss:%.4f CLS Loss:%.4f RANK Loss:%.4f  Loss:%.4f" % (reg_loss.item(), cls_loss.item(), rank_loss.item(), loss.item()))

        # evaluate
        model.eval()
        print("Evaluate at epoch %s" % (epoch + 1))
        y_pred, y_pred_cls = model.predict_toprank([test_w1, test_w2, test_w3, test_w4], device, top_k=5)

        # calculate metric
        y_pred = np.array(y_pred).ravel()
        test_y = np.array(test_y).ravel()
        mae = round(mean_absolute_error(test_y, y_pred), 4)
        acc_score = Acc(test_cls, y_pred)

        results = []
        for k in ks_list:
            IRRs, MRRs, Prs = [], [], []
            for i in range(test_shape):
                M = MRR(np.array(test_y[loop_number * i : loop_number * (i + 1)]), np.array(y_pred[loop_number * i : loop_number * (i + 1)]), k=k)
                MRRs.append(M)
                P = Precision(
                    np.array(test_y[loop_number * i : loop_number * (i + 1)]), np.array(y_pred[loop_number * i : loop_number * (i + 1)]), k=k
                )
                Prs.append(P)
            over_all = [mae, round(acc_score, 4), round(np.mean(MRRs), 4), round(np.mean(Prs), 4)]
            results.append(over_all)
        print(results)

        # print('MAE:',round(mae,4),' IRR:',round(np.mean(IRRs),4),' MRR:',round(np.mean(MRRs),4)," Precision:",round(np.mean(Prs),4))
        performance = [round(mae, 4), round(acc_score, 4), round(np.mean(MRRs), 4), round(np.mean(Prs), 4)]

        # print(performance)

        # save best
        if np.mean(MRRs) > global_best_MRR:
            global_best_MRR = np.mean(MRRs)
            best_metric_MRR = performance
            best_results_MRR = results

    return best_metric_IRR, best_metric_MRR, best_results_IRR, best_results_MRR

In [43]:
if __name__ == "__main__":
    best_metric_IRR, best_metric_MRR, best_results_IRR, best_results_MRR = train()
    print("-------Final result-------")
    print("[BEST MRR] MAE:%.4f ACC:%.4f MRR:%.4f Precision:%.4f" % tuple(best_metric_MRR))
    for idx, k in enumerate(ks_list):
        print("[BEST RESULT MRR with k=%s] MAE:%.4f ACC:%.4f MRR:%.4f Precision:%.4f" % tuple(tuple([k]) + tuple(best_results_MRR[idx])))

Number of parameters:77184
weekly batch[i] size = torch.Size([475, 7, 30])
weekly_embedding size = torch.Size([475, 1, 64])
after concat weekly_embedding size = torch.Size([475, 3, 64])
weekly_att_vector size = torch.Size([475, 64])
inner_graph_embedding size = torch.Size([475, 64])
after reshape inner_graph_embedding size = torch.Size([19, 25, 64])
category_vectors size = torch.Size([19, 64])
after sector gat category_vectors size = torch.Size([19, 64])
after reshape category_vectors size = torch.Size([19, 25, 64])
cat fusion_vec size = torch.Size([19, 25, 192])
linear fusion_vec size = torch.Size([19, 25, 64])
relu fusion_vec size = torch.Size([19, 25, 64])
reg_output size = torch.Size([19, 25, 1])
flatten reg_output size = torch.Size([475])
weekly batch[i] size = torch.Size([475, 7, 30])
weekly_embedding size = torch.Size([475, 1, 64])
after concat weekly_embedding size = torch.Size([475, 3, 64])
weekly_att_vector size = torch.Size([475, 64])
inner_graph_embedding size = torch.Size(