In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from qids_package.qids import *

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from typing import Tuple

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import math
import time
import warnings
import os
import datetime
import scipy

import random
warnings.simplefilter('ignore')
torch.manual_seed(0)
np.random.seed(0)

class Transformer_ours(nn.Module):
    def __init__(self,
        in_features_encoder: int, # num of features in the encoder,
        in_features_decoder: int, # num of features in the decoder
        out_features: int, # num of features to represent the predicted stock information, default value will be: 1
        enc_seq_len: int,
        dec_seq_len: int,
        d_model: int,  
        n_encoder_layers: int,
        n_decoder_layers: int,
        n_heads: int,
        dropout_encoder: float=0.2, 
        dropout_decoder: float=0.2,
        dropout_pos_enc: float=0.2,
        dropout_pos_dec: float=0.2,
        dim_feedforward_encoder: int=2048,
        dim_feedforward_decoder: int=2048,
        conv_size: int = 10,
        long_dependency_multiplier: int = 4
        ): 

        super().__init__() 

        self.n_heads = n_heads
        self.enc_seq_len = enc_seq_len
        self.dec_seq_len = dec_seq_len
        self.long_dependency_multiplier = long_dependency_multiplier
        self.in_features_encoder = in_features_encoder
        self.in_features_decoder = in_features_decoder
        # after adding long dependency, #cols becomes 1.5*in_features
        self.encoder_input_layer = nn.Linear(in_features=int(in_features_encoder*1.5), out_features=d_model)
        self.decoder_input_layer = nn.Linear(in_features=in_features_decoder, out_features=d_model)  
        
        self.linear_mapping = nn.Linear(in_features=d_model,out_features=out_features)
        self.long_dependency_compression = nn.Linear(in_features = int(in_features_encoder*1.5), out_features = int(in_features_encoder*0.5))
        
        # self.positional_encoding_layer = Time2Vector(seq_len=enc_seq_len, out_features=d_model, dropout=dropout_pos_enc)
        # self.positional_decoding_layer = Time2Vector(seq_len=dec_seq_len, out_features=d_model, dropout=dropout_pos_dec)
        
        encoder_layer = MyEncoder(
            d_model=d_model, 
            nheads=n_heads,
            dim_feedforward_encoder=dim_feedforward_encoder,
            dropout_encoder=dropout_encoder,
            conv_size=conv_size,
            )
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layer,num_layers=n_encoder_layers, norm=None)
        
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=dim_feedforward_decoder,
            dropout=dropout_decoder,
            batch_first=True)
        self.decoder = nn.TransformerDecoder(decoder_layer=decoder_layer,num_layers=n_decoder_layers, norm=None)
    
    def forward(self, src, tgt):    
        encoder_seq_len_true = int(self.enc_seq_len/self.long_dependency_multiplier)
        '''add long dependency'''
        new_src = torch.empty(len(src), encoder_seq_len_true, int(1.5*self.in_features_encoder))
        history = torch.zeros(encoder_seq_len_true, int(self.in_features_encoder*0.5))
        for i in range(len(src)):
            for j in range(self.long_dependency_multiplier):
                src_temp = src[i][j*encoder_seq_len_true: (j+1)*encoder_seq_len_true]
                src_temp = torch.concat([src_temp, history], dim = -1)
                history = self.long_dependency_compression(src_temp)
            new_src[i] = src_temp      
        new_src = self.encoder_input_layer(new_src)
        new_src = self.encoder(src=new_src)      
        tgt = self.decoder_input_layer(tgt)
        tgt = self.decoder(tgt=tgt, memory=new_src)

        decoder_output= self.linear_mapping(tgt)

        return decoder_output.squeeze()


class MyEncoder(nn.Module):
    def __init__(self, d_model: int, nheads: int, dim_feedforward_encoder: int = 2048, dropout_encoder: float = 0.2, conv_size: int = 10):
        super(MyEncoder, self).__init__()

        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nheads,
            dim_feedforward=dim_feedforward_encoder,
            dropout=dropout_encoder,
            batch_first=True)

        self.convolution_layer = nn.Conv2d(1,1,(conv_size,1),stride = 1,bias = False)
        self.combine_encoder_conv = nn.Linear(in_features = 2*d_model, out_features = d_model)
        self.conv_size = conv_size

    def forward(self, x, src_mask: Tensor= None, src_key_padding_mask: Tensor = None): 
        encoder_layer_temp = self.encoder_layer(src = x)   
        x_pad = F.pad(x, (0,0, self.conv_size - 1,0)) # add padding so that the output of a convolution layer and encoder layer are the same
        x_pad = torch.unsqueeze(x_pad, dim = 1)
        conv_layer_temp = self.convolution_layer(x_pad)
        conv_layer_temp = torch.squeeze(conv_layer_temp, dim = 1) 
        temp = torch.concat([encoder_layer_temp, conv_layer_temp], dim = -1)
        x = self.combine_encoder_conv(temp)
        return x


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from typing import Tuple
import random

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import math
import time
import warnings
import os
import datetime
import scipy
from sklearn import metrics

path_fund = './first_round_train_fundamental_data.csv'
path_market = './first_round_train_market_data.csv'
path_ret = './first_round_train_return_data.csv'

def get_data(path_fund, path_market, path_ret):
    train_fund_data = pd.read_csv(path_fund)
    train_market_data = pd.read_csv(path_market)
    train_return_data = pd.read_csv(path_ret)
    return train_fund_data, train_market_data, train_return_data

def data_process(_data):
    data = {}
    _data[['code','time']] = _data['date_time'].str.split("d", expand=True)
    _data = _data.drop(['date_time','time'], axis = 1)
    
    _data = _data.groupby('code')
    for name, group in _data:
        group = group.drop('code', axis=1).to_numpy().astype(float)
        group = torch.FloatTensor(group)
        data[name] = group
    return data

def all_stock_train_eval(data_fund, data_market, data_ret,
                         batch_size : int = 64, 
                         sample_rate: float = 0.5,
                         enc_seq_len: int = 5*50,
                         dec_seq_len: int = 5):
    total_loss = 0.

    rand = np.array([0, 1])
    prob = np.array([1-sample_rate, sample_rate])

    train_seq, test_seq = [], []
    
    for key in data_fund.keys():
        # number of records of stock available for training
        data_range = len(data_fund[key])-dec_seq_len - 2
        for i in range(0, data_range, 1):
            # print(data_market[key][i*enc_seq_len:(i+1)*enc_seq_len, :].shape)
            if random.randint(0, 3) == 0:  # 1/4 test; 3/4 train
                test_seq.append((data_market[key][(i+dec_seq_len)*50-enc_seq_len:(i+dec_seq_len)*50, :], data_fund[key][i:i+dec_seq_len, :], data_ret[key][i+dec_seq_len-1:i+dec_seq_len, -1]))
            else:
                train_seq.append((data_market[key][(i+dec_seq_len)*50-enc_seq_len:(i+dec_seq_len)*50, :], data_fund[key][i:i+dec_seq_len, :], data_ret[key][i+dec_seq_len-1:i+dec_seq_len, -1]))

    random.shuffle(train_seq) # shuffle all stocks train seq
    random.shuffle(test_seq)  # shuffle all stocks test seq
    
    train_src_seq, train_tgt_seq, train_tgt_y_seq = [], [], []
    test_src_seq, test_tgt_seq, test_tgt_y_seq = [], [], []

    for seq in train_seq:
        if np.random.choice(a=rand, size=1, replace=True, p=prob) == 1: # with probability sample_rate, keep the training seq, otherwise discard
            train_src_seq.append(seq[0])
            train_tgt_seq.append(seq[1])
            train_tgt_y_seq.append(seq[2])

    for seq in test_seq:
        test_src_seq.append(seq[0])
        test_tgt_seq.append(seq[1])
        test_tgt_y_seq.append(seq[2])


    model.train()

    for batch, i in enumerate(range(0, len(train_src_seq), batch_size)):
        data_len = min(batch_size, len(train_src_seq)-batch_size*batch)
        optimizer.zero_grad()
        src=torch.stack(train_src_seq[i:i+data_len])
        tgt=torch.stack(train_tgt_seq[i:i+data_len])
        output = model(src=torch.stack(train_src_seq[i:i+data_len]), tgt=torch.stack(train_tgt_seq[i:i+data_len]))
        
        loss = criterion(output, torch.stack(train_tgt_y_seq[i:i+data_len]))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7)
        optimizer.step()

    model.eval()
        
    with torch.no_grad():
        mean_loss = 0.0
        for batch, i in enumerate(range(0, len(test_src_seq), batch_size)):
            data_len = min(batch_size, len(test_src_seq)-batch_size*batch)
            output = model(src=torch.stack(test_src_seq[i:i+data_len]), tgt=torch.stack(test_tgt_seq[i:i+data_len]))
            loss = criterion(output, torch.stack(test_tgt_y_seq[i:i+data_len]))
            mean_loss = (i*mean_loss+data_len*loss)/(i+data_len)
    
    # print("finish calculate loss of test seq: ")
    # print(datetime.datetime.now())
    return mean_loss



d_model = 256
n_heads = 8 
n_encoder_layers = 2 
n_decoder_layers = 2 
in_features_encoder = 6 # market data
in_features_decoder = 7 # fund data
out_features = 1
long_dependency_multiplier = 10 # compress 150 to 15 (enc_seq_len)
enc_seq_len = 3*50 # 3*50 bar market data
dec_seq_len = 3 # 3 days fund data
sample_ratio = 0.01
conv_size = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('start')

train_fund_data, train_market_data, train_ret_data = get_data(path_fund, path_market, path_ret)
train_fund_data = data_process(train_fund_data)
train_market_data = data_process(train_market_data)
train_ret_data = data_process(train_ret_data)

print('data processed.')
model = Transformer_ours(
        d_model=d_model,
        in_features_encoder=in_features_encoder,
        in_features_decoder=in_features_decoder,
        out_features=out_features,
        enc_seq_len=enc_seq_len,
        dec_seq_len=dec_seq_len,
        n_encoder_layers=n_encoder_layers,
        n_decoder_layers=n_decoder_layers,
        n_heads=n_heads,
        conv_size = conv_size,
        long_dependency_multiplier=long_dependency_multiplier
        )


criterion = nn.MSELoss()
criterion_mae = nn.L1Loss()
step_num = 1
warmup_step = 5000
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9,0.98), eps=1e-08, lr = lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_num, gamma=0.9)


batch_size = 8
epochs = 50 # edit later    
best_loss = float('inf')   
stop_count = 0    
save_every_epoch = 50

    
print("start training: ")
for epoch in range(0, epochs):   
    mean_loss = all_stock_train_eval(train_fund_data, train_market_data, train_ret_data, batch_size, sample_rate=sample_ratio)
    print('-' * 15)
    print('　　　current epoch: {%d}' % epoch)
    print('　　　current_loss: {:5.5f}'.format(mean_loss))
    print(datetime.datetime.now())
    print('-' * 15)
    if mean_loss < best_loss:
        best_loss = mean_loss
        stop_count = 0
    else:
        stop_count += 1
    # if stop_count == 4: #Early stop if no updates occur 4 times
    #     break
    scheduler.step()  

    # save models
    if(epoch % save_every_epoch == save_every_epoch - 1):
        path = 'model.pt'
        torch.save(model.state_dict(), path)

print('-' * 15)
print('　　　best_loss: {:5.5f}'.format(best_loss))
print('-' * 15)   


In [2]:
env = make_env()  # initialize the environment

Environment is initialized.


In [None]:
# sample code using random number as prediction
import random
random.seed(42)
while not env.is_end():
    fundamental_df, market_df = env.get_current_market()  # get correlated dataset

    # make your prediction Y here and replace the following four rows
    ###### load model here and make prediction
    # torch.load("model.pt")
    l = []
    for idx in range(54):
        l.append(random.random())
    predict_ds = pd.Series(l)

    env.input_prediction(predict_ds)  # upload your predicted Y