In [None]:
import warnings
warnings.filterwarnings('ignore')

import math
import random
import datetime

import numpy as np
import pandas as pd
from scipy.special import softmax

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

from gym.utils import seeding
import gym
from gym import spaces

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import DDPG
from stable_baselines3 import SAC

## Data

In [None]:
df = pd.read_csv("./data/data0605.csv")

In [None]:
fea = pd.read_csv("./data/features0606.csv")

In [None]:
sen =  pd.read_csv("./data/sentiment_sum.csv")

In [None]:
sen["date"] = pd.to_datetime(sen["date"])
df["date"] = pd.to_datetime(df["date"])
fea["date"] = pd.to_datetime(fea["date"])

In [None]:
# same date
date = fea["date"]
sen = sen[sen["date"].isin(date)]
sen = sen.reset_index()
del sen["index"]

In [None]:
# same date
date = sen["date"]
df = df[df["date"].isin(date)]
df = df.reset_index()
del df["index"]

In [None]:
# same date
date = sen["date"]
fea = fea[fea["date"].isin(date)]
fea = fea.reset_index()
del fea["index"]

In [None]:
# testing
df2 = pd.read_csv("./test_data.csv")

In [None]:
fea2 = pd.read_csv("./testing features.csv")

In [None]:
# sen
sen2 = pd.read_csv("./sentiment.csv")
sen2 = sen2[sen2["date"] >= "2019/1/1"]

In [None]:
df2["date"] = pd.to_datetime(df2["date"])
fea2["date"] = pd.to_datetime(fea2["date"])

In [None]:
# split
train = df[df.date < "2018-01-01"]
valid = df[df.date >= "2018-01-01"]
valid = valid[valid.date < "2019-01-01"]
test = df2
train.columns = test.columns
valid.columns = test.columns

train = train.reset_index()
del train["index"]
valid = valid.reset_index()
del valid["index"]

In [None]:
def pct(df):
    df_1 = df.copy(deep=True)
    df_3 = df.copy(deep=True)
    df_5 = df.copy(deep=True)
    df_1.iloc[:,1:11] = df_1.iloc[:,1:11].pct_change(periods=1)
    df_3.iloc[:,1:11] = df_3.iloc[:,1:11].pct_change(periods=3)
    df_5.iloc[:,1:11] = df_5.iloc[:,1:11].pct_change(periods=5)

    df = pd.concat([df_1, df_3.iloc[:,1:], df_5.iloc[:,1:]], axis=1)
    df = df.fillna(0.0)
    return df

In [None]:
# percentage
train = pct(train)
valid = pct(valid)
test = pct(test)

In [None]:
training_iterations = len(train)
validation_iterations = len(valid)
testing_iterations = len(test)
print(training_iterations, validation_iterations, testing_iterations)

### Add Features

In [None]:
# mdd
mdd = fea.iloc[:,41:51]
train_mdd = mdd[:len(train)]
valid_mdd = mdd[len(train):]
valid_mdd = valid_mdd.reset_index()
del valid_mdd["index"]
test_mdd = fea2.iloc[:,41:51]

In [None]:
# dd
dd = fea.iloc[:,31:41]
train_dd = dd[:len(train)]
valid_dd = dd[len(train):]
valid_dd = valid_dd.reset_index()
del valid_dd["index"]
test_dd = fea2.iloc[:,31:41]

In [None]:
# rsi
rsi = fea.iloc[:,21:31]
train_rsi = rsi[:len(train)]
valid_rsi = rsi[len(train):]
valid_rsi = valid_rsi.reset_index()
del valid_rsi["index"]
test_rsi = fea2.iloc[:,21:31]

In [None]:
# kv
kv = fea.iloc[:,11:21]
train_kv = kv[:len(train)]
valid_kv = kv[len(train):]
valid_kv = valid_kv.reset_index()
del valid_kv["index"]
test_kv = fea2.iloc[:,11:21]

In [None]:
# dv
dv = fea.iloc[:,1:11]
train_dv = dv[:len(train)]
valid_dv = dv[len(train):]
valid_dv = valid_dv.reset_index()
del valid_dv["index"]
test_dv = fea2.iloc[:,1:11]

In [None]:
# sen
sen = sen.iloc[:,1:]
train_sen = sen[:len(train)]
valid_sen = sen[len(train):]
valid_sen = valid_sen.reset_index()
del valid_sen["index"]
test_sen = sen2.iloc[:,2:]

In [None]:
train = pd.concat([train, train_mdd, train_dd, train_rsi, train_kv, train_dv, train_sen], axis=1).ffill().bfill()
valid = pd.concat([valid, valid_mdd, valid_dd, valid_rsi, valid_kv, valid_dv, valid_sen], axis=1).ffill().bfill()
test = pd.concat([test, test_mdd, test_dd, test_rsi, test_kv, test_dv, test_sen], axis=1).ffill().bfill()

### Transaction cost & others

In [None]:
key = train.columns[1:11]
key

In [None]:
risk = {"LU0082087783": "Bond", "TW000T2776A0": "Bond", "LU1061040777": "Bond", "LU0345759590": "Bond", "LU0082770016": "Equity", "TW000T3626C2": "Bond", "IE00B889SK00": "Bond", "LU1035775359": "Equity", "LU0149726845": "Equity", "TW000T3743D3": "Hybrid"}
rk = []
for k in key:
    if risk[k] == "Bond":
        rk.append(0.01)
    else:
        rk.append(0.02)
rk

## Environment for Portfolio Allocation


In [None]:
class StockPortfolioEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}

    def __init__(self, 
                df,
                mdd,
                initial_amount,
                transaction_cost_pct,
                stock_dim,
                state_dim,
                action_dim,
                features_dim,
                train,
                key,
                period,
                day = 0):

        # setting
        self.day = day
        self.mdd = mdd
        self.key = key
        self.period = period
        self.terminal = False  
        self.transaction_cost_pct = transaction_cost_pct
        self.train = train

        # portfolio value    
        self.initial_amount = initial_amount
        self.portfolio_value = self.initial_amount 
        self.trans_cost = 0
        
        # dimensions
        self.stock_dim = stock_dim
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.features_dim = features_dim
        
        # spaces
        self.state_space = self.state_dim
        self.action_space = spaces.Box(low = 0, high = 1,shape = (self.action_dim,)) 
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.period , self.state_dim))

        # data
        self.df = df
        self.n_df = self.normalization(df)
        self.data = self.df.loc[self.day+1:self.day+self.period,:]
        self.n_data = self.n_df.loc[self.day:self.day+self.period-1,:]
        self.last_data = self.data

        # state
        self.state = self.n_data.iloc[:,1:]
        weights = [0.1 for i in range(self.action_dim)]
        self.last_weight = weights
        self.actions_memory = [weights]
        self.date_memory=[self.data.date.values[-1]] 
        self.last_reward = []

        
    def step(self, actions):

        self.terminal = self.day >= len(self.df.index.unique())-1

        if self.terminal:
            
            return self.state, self.reward, self.terminal,{}

        else:
            
            if len(self.data) > 0:
                # get actions
                new_weight = self.softmax_normalization(actions)
                if sum(new_weight[6:9]) > 0.75:
                    new_weight[6:9] /= 2
                    new_weight = self.softmax_normalization(actions)

                # get weights
                last_weight = self.last_weight
                change = [1 for row in range(self.stock_dim)]
                for row in range(len(self.data)-1):
                    change *= (1+self.data.iloc[-1,1:self.stock_dim+1].values)

                if sum(change * self.last_weight) > 0:
                    self.portfolio_value *= sum(change*last_weight)
                    last_weight = (self.last_weight * change) / sum(self.last_weight * change)

                # transition cost
                action_changes = new_weight - last_weight
                action_1 = [a if a > 0 else 0 for a in action_changes]
                action_2 = [a if a < 0 else 0 for a in action_changes]
                action_1 = np.clip(action_changes, 0.005, 0.025)
                action_2 = np.clip(action_changes, -0.025, -0.005)

                action_changes = []
                for i in range(len(action_1)):
                    action_changes.append(action_1[i] + action_2[i])
                action_changes = np.array(action_changes)
                new_weight = last_weight + action_changes
                new_weight /= sum(new_weight)

                trans_cost = sum(abs(action_changes * self.transaction_cost_pct)) * self.portfolio_value
                self.trans_cost += trans_cost

                # Marginal reward
                portfolio_return = sum(self.data.iloc[-1,1:1+self.stock_dim].values * new_weight) * self.portfolio_value
    #             self.reward = portfolio_return / self.portfolio_value * 1000 

                # last 10 reward
                if len(self.last_reward) == 10:
                    self.last_reward = self.last_reward[1:9]
                    self.last_reward.append(portfolio_return)
                else:
                    self.last_reward.append(portfolio_return)

                if np.array(self.last_reward).std() > 0:
                    self.reward = (252**0.5) * np.array(self.last_reward).mean() / np.array(self.last_reward).std() 
                else:
                    self.reward = 0

                # one step
                self.day = min(len(self.df.index.unique())-1, self.day + self.period)
                self.data = self.df.loc[self.day+1:self.day+self.period,:]
                self.n_data = self.n_df.loc[self.day:self.day+self.period-1,:]

                # new state 
                self.state = self.n_data.iloc[:,1:]

                while len(self.state) < self.period:
                    padding = [0.0 for i in range(self.state_dim)]
                    self.state = np.append(self.state, [padding], axis=0)

                if sum(change * new_weight) > 0:
                    self.last_weight = (change * new_weight) / sum(change * new_weight)

                # Total reward
                self.portfolio_value += portfolio_return
                self.asset = self.portfolio_value - self.trans_cost

                # saving memories
                if self.day < len(self.df.index.unique()) - 1:
                    if len(self.data) > 0:
                        self.actions_memory.append(new_weight)
                        self.date_memory.append(self.data.date.values[-1]) 
                else:
                    pass

        return self.state, self.reward, self.terminal, {}

    def reset(self):
        
        # setting
        self.day = 0
        self.terminal = False 
        
        # portfolio value
        self.portfolio_value = self.initial_amount
        self.trans_cost = 0

        # data
        self.data = self.df.loc[self.day+1:self.day+self.period,:]
        self.n_data = self.n_df.loc[self.day:self.day+self.period-1,:]
        self.last_data = self.data
        
        # state
        self.state = self.n_data.iloc[:,1:]
        weights = [0.1 for i in range(self.action_dim)]
        self.last_weight = weights
        self.actions_memory = [weights]
        self.date_memory=[self.data.date.values[-1]] 
        self.last_reward = []
        
        return self.state

    def normalization(self, df):
        df2 = df.copy(deep=True)
        for i in range(1,self.features_dim+1):
            df2.iloc[:,10*(i-1)+1:10*i+1] = (df2.iloc[:,10*(i-1)+1:10*i+1] - df2.iloc[:,10*(i-1)+1:10*i+1].mean().mean()) / df2.iloc[:,10*(i-1)+1:10*i+1].std().std() 

        return df2

    def softmax_normalization(self, actions):
        
        softmax_output = softmax(actions)
        
        return softmax_output

    def save_action_memory(self):

        date_list = self.date_memory
        df_date = pd.DataFrame(date_list)
        df_date.columns = ['date']

        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.key
        df_actions.index = df_date.date

        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

In [None]:
stock_dimension = 10
state_dimension = len(train.columns) - 1
feature_dimension = state_dimension // stock_dimension
action_dimension = stock_dimension
period = 5
print(f"Stock Dimension: {stock_dimension}, State Dimension: {state_dimension}, Action Dimension: {action_dimension}, Feature Dimension: {feature_dimension}")

In [None]:
env_kwargs = {
    "initial_amount": 1000000, 
    "transaction_cost_pct": rk, 
    "state_dim": state_dimension,
    "stock_dim": stock_dimension, 
    "action_dim": action_dimension, 
    "features_dim": feature_dimension,
    "train": True,
    "key": key,
    "period": period
}

e_train_gym = StockPortfolioEnv(df = train, mdd = train_mdd, **env_kwargs)

In [None]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

### Validation

In [None]:
val_env_kwargs = {
    "initial_amount": 1000000, 
    "transaction_cost_pct": rk, 
    "state_dim": state_dimension,
    "stock_dim": stock_dimension, 
    "action_dim": action_dimension, 
    "features_dim": feature_dimension,
    "key": key,
    "train": False,
    "period": period
}

In [None]:
validation = StockPortfolioEnv(df = valid, mdd = valid_mdd, **val_env_kwargs)

In [None]:
def prediction(model, environment):

    actions_memory = []

    valid_env, _ = environment.get_sb_env()
    valid_obs = valid_env.reset()
    for i in range(len(environment.df.index.unique()-1)):
        action, _states = model.predict(valid_obs, deterministic=True)
        valid_obs, rewards, dones, info = valid_env.step(action)

        if i == (len(environment.df.index.unique()) - 2):
            actions_memory = valid_env.env_method(method_name="save_action_memory")

    return actions_memory[0]

# Calculate profit

In [None]:
def calculate_profit_train(pct, act):
    
    risk = [0.01, 0.01, 0.02, 0.02, 0.01, 0.02, 0.01, 0.01, 0.01, 0.02]
    act = act.reset_index()
    act = pd.merge(pct.iloc[:,:1], act, on="date", how='outer')
    act = act.fillna(0.0)
    
    initail_amount = 1000000
    total_trans_cost = 0
    last_act = [0.0 for i in range(10)]

    for i, p in enumerate(pct.iterrows()):
        
#         if i > 0:
        if sum(act.iloc[i,1:].values) == 0:
            a = last_act
            trans_cost = 0
        else:
            a = act.iloc[i,1:].values 
            trans_cost = sum(abs(a - last_act) * initail_amount * risk)

        if sum(last_act) > 0:
            initail_amount = sum((a * initail_amount) * (pct.iloc[i,1:].values + 1)) - trans_cost

        if sum(a) > 0:
            last_act = (a  * (pct.iloc[i,1:].values  + 1)) * initail_amount / sum((a * (pct.iloc[i,1:].values + 1)) * initail_amount)

        total_trans_cost += trans_cost
        
    return initail_amount, total_trans_cost

In [None]:
tmp = df[df.date >= "2018-01-01"]
tmp = tmp[tmp.date < "2019-01-01"]
tmp.iloc[:,1:] = tmp.iloc[:,1:].pct_change(periods=1)
tmp = tmp.fillna(0.0)
tmp.iloc[:,0] = pd.to_datetime(tmp.iloc[:,0])

# Implement DRL Algorithms

In [None]:
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

In [None]:
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(action_dimension), sigma=0.1 * np.ones(action_dimension))

In [None]:
model = DDPG("MlpPolicy", env_train, action_noise=action_noise, verbose = 0, buffer_size = 100, tau = 0.001, learning_rate = 0.00025, batch_size=100)

In [None]:
last = 0

for epoch in range(10):
    model = model.learn(total_timesteps = training_iterations)
    df_act = prediction(model = model, environment = validation)
    value, cost = calculate_profit_train(tmp, df_act)
    print("=================================")
    print("Epoch:", epoch, "=> Validation Return:", value, ", Transaction cost:", cost)
    print("=================================")

    if value > last:
        last = value
        model.save("./model/model10")

# Trading

In [None]:
test_env_kwargs = {
    "initial_amount": 1000000, 
    "transaction_cost_pct": rk, 
    "state_dim": state_dimension,
    "stock_dim": stock_dimension, 
    "action_dim": action_dimension, 
    "features_dim": feature_dimension,
    "key": key,
    "train": False,
    "period": period
}

In [None]:
trade = StockPortfolioEnv(df = test, mdd = test_mdd, **test_env_kwargs)

In [None]:
test_model = DDPG.load("./model/model10", trade)

In [None]:
def test_prediction(model, environment):
    
    actions_memory = []

    test_env, _ = environment.get_sb_env()
    test_obs = test_env.reset()
    for i in range(len(environment.df.index.unique())):
        
        action, _states = model.predict(test_obs, deterministic=True)
        test_obs, rewards, dones, info = test_env.step(action)

        if i == (len(environment.df.index.unique()) - 2):
            actions_memory = test_env.env_method(method_name="save_action_memory")

    return actions_memory[0]

In [None]:
df_actions = test_prediction(model = test_model, environment = trade)

In [None]:
df_actions.to_csv("./result5.csv")
df_actions = df_actions*100
df_actions.round()

In [None]:
tmp = df2
tmp.iloc[:,1:] = tmp.iloc[:,1:].pct_change(periods=1)
tmp = tmp.fillna(0.0)
tmp.iloc[:,0] = pd.to_datetime(tmp.iloc[:,0] )

In [None]:
def calculate_profit(pct, act):
    
    asset = []
    risk = [0.01, 0.01, 0.02, 0.02, 0.01, 0.02, 0.01, 0.01, 0.01, 0.02]
    act = act.reset_index()
    act = pd.merge(pct.iloc[:,:1], act, on="date", how='outer')
    act = act.fillna(0.0)
    
    initial_amount = 1000000
    last_act = [0.0 for i in range(10)]
    total_trans_cost = 0

    for i, p in enumerate(pct.iterrows()):
        
        if i > 0:
            if sum(act.iloc[i,1:].values) == 0:
                a = last_act
                trans_cost = 0
            else:
                a = act.iloc[i,1:].values / 100
                trans_cost = sum(abs(a - last_act) * initial_amount * risk)
                
            if sum(last_act) > 0:
                initial_amount = sum((a * initial_amount) * (pct.iloc[i,1:].values + 1)) - trans_cost
            
            if sum(a) > 0:
                last_act = (a  * (pct.iloc[i,1:].values  + 1)) * initial_amount / sum((a * (pct.iloc[i,1:].values + 1)) * initial_amount)
        
            total_trans_cost += trans_cost
            
        asset.append(initial_amount)
        
    print(initial_amount)    
    return asset, total_trans_cost

In [None]:
asset, cost = calculate_profit(tmp, df_actions)

In [None]:
cost

# Plot

In [None]:
times = df2[["date"]].values

In [None]:
def sequence_data(data, t):
        
    return t, data

In [None]:
%matplotlib inline
def plot(asset, times, save=False):
    time, data = sequence_data(asset, times)
    
    fig, ax = plt.subplots(figsize=(36, 6))
    plt.xticks(rotation=90)
    ax.plot(time, data, label="ddpg")
    plt.grid(b='on')
    
    if save:
        fig.savefig("pic5.png", bbox_inches='tight')

In [None]:
plot(asset, times, save=True)