# Data

In [9]:
# db connection

import pymysql
from sqlalchemy import create_engine
import keyring
import platform
import numpy as np

user = 'root'
pw = keyring.get_password('macmini_db', user)
host = '192.168.219.106' if platform.system() == 'Windows' else '127.0.0.1'
port = 3306
db = 'stock'


# # connect DB
# engine = create_engine(f'mysql+pymysql://{self.user}:{self.pw}@{self.host}:{self.port}/{self.db}')

# con = pymysql.connect(
#     user=user,
#     passwd=pw,
#     host=host,
#     db=db,
#     charset='utf8'
# )
        
# mycursor = con.cursor()

In [10]:
import pandas as pd
import pymysql
from sqlalchemy import create_engine


# get us stock price of a specific ticker
def get_prices_from_ticker(ticker, fro=None, to=None):

    # connect DB
    engine = create_engine(f'mysql+pymysql://{user}:{pw}@{host}:{port}/{db}')

    con = pymysql.connect(
        user=user,
        passwd=pw,
        host=host,
        db=db,
        charset='utf8'
    )
            
    mycursor = con.cursor()
    
    if fro is not None:
        if to is not None:               
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = {ticker}
                    AND date BETWEEN {fro} AND {to} 
                    """
        else:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = {ticker}
                    AND date >= {fro} 
                    """
    
    else:
        if to is not None:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = {ticker}
                    AND date <= {to} 
                    """
        else:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = '{ticker}'
                    """
            
    print(query)
    prices = pd.read_sql(query, con=engine)
    con.close()
    engine.dispose()
    return prices

In [47]:
df = get_prices_from_ticker('AAPL')

 
                    SELECT * FROM price_global
                    WHERE ticker = 'AAPL'
                    


In [48]:
COLUMNS_STOCK_RATIO_DATA = [
    'open_close_ratio', 'open_prev_close_ratio', 'high_close_ratio', 'low_close_ratio',
    'close_prev_close_ratio', 'volume_prev_volume_ratio',
]

def preprocess(data):
    
    # moving average
    windows = [5, 10, 20, 60, 120, 240]
    for window in windows:
        data[f'close_ma{window}'] = data['close'].rolling(window).mean()
        data[f'volume_ma{window}'] = data['volume'].rolling(window).mean()
        data[f'close_ma{window}_ratio'] = (data['close'] - data[f'close_ma{window}']) / data[f'close_ma{window}']
        data[f'volume_ma{window}_ratio'] = (data['volume'] - data[f'volume_ma{window}']) / data[f'volume_ma{window}']
        data['open_close_ratio'] = (data['open'].values - data['close'].values) / data['close'].values
        data['open_prev_close_ratio'] = np.zeros(len(data))
        data.loc[1:, 'open_prev_close_ratio'] = (data['open'][1:].values - data['close'][:-1].values) / data['close'][:-1].values
        data['high_close_ratio'] = (data['high'].values - data['close'].values) / data['close'].values
        data['low_close_ratio'] = (data['low'].values - data['close'].values) / data['close'].values
        data['close_prev_close_ratio'] = np.zeros(len(data))
        data.loc[1:, 'close_prev_close_ratio'] = (data['close'][1:].values - data['close'][:-1].values) / data['close'][:-1].values 
        data['volume_prev_volume_ratio'] = np.zeros(len(data))
        data.loc[1:, 'volume_prev_volume_ratio'] = (
            # if volume is 0, change it into non zero value exploring previous volume continuously
            (data['volume'][1:].values - data['volume'][:-1].values) / data['volume'][:-1].replace(to_replace=0, method='ffill').replace(to_replace=0, method='bfill').values
        )
    
    # Bollinger band
    data['middle_bb'] = data['close'].rolling(20).mean()
    data['upper_bb'] = data['middle_bb'] + 2 * data['close'].rolling(20).std()
    data['lower_bb'] = data['middle_bb'] - 2 * data['close'].rolling(20).std()
    data['bb_pb'] = (data['close'] - data['lower_bb']) / (data['upper_bb'] - data['lower_bb'])
    data['bb_width'] = (data['upper_bb'] - data['lower_bb']) / data['middle_bb']
    
    # MACD
    macd_short, macd_long, macd_signal = 12, 26, 9
    data['ema_short'] = data['close'].ewm(macd_short).mean()
    data['ema_long'] = data['close'].ewm(macd_long).mean()
    data['macd'] = data['ema_short'] - data['ema_long']
    data['macd_signal'] = data['macd'].ewm(macd_signal).mean()
    data['macd_oscillator'] = data['macd'] - data['macd_signal']
    
    # RSI
    data['close_change'] = data['close'].diff()
    data['close_up'] = np.where(data['close_change']>=0, df['close_change'], 0)
    # data['close_up'] = data['close_change'].apply(lambda x: x if x >= 0 else 0)
    data['close_down'] = np.where(data['close_change'] < 0, df['close_change'].abs(), 0)
    # data['close_down] = data['close_change'].apply(lambda x: -x if x < 0 else 0)
    data['rs'] = data['close_up'].ewm(alpha=1/14, min_periods=14).mean() / data['close_down'].ewm(alpha=1/14, min_periods=14).mean()
    data['rsi'] = 100 - (100 / (1 + data['rs']))
    
    
    
    
    return data
        
    

In [49]:
df_adj = preprocess(df)

In [50]:
df_adj = df_adj[['date', 'close', 'volume', 'bb_width', 'bb_pb', 'macd', 'rsi']]

In [51]:
df_adj

Unnamed: 0,date,close,volume,bb_width,bb_pb,macd,rsi
0,1980-12-12,0.128348,0.099192,,,0.000000,
1,1980-12-15,0.121652,0.094017,,,-0.000071,
2,1980-12-16,0.112723,0.087117,,,-0.000221,
3,1980-12-17,0.115513,0.089273,,,-0.000244,
4,1980-12-18,0.118862,0.091861,,,-0.000196,
...,...,...,...,...,...,...,...
10900,2024-03-11,172.750000,172.750000,0.130244,0.220096,-3.602621,33.961404
10901,2024-03-12,173.229996,173.229996,0.127183,0.264019,-3.713854,35.372263
10902,2024-03-13,171.130005,171.130005,0.128136,0.203242,-3.885908,32.137364
10903,2024-03-14,173.000000,173.000000,0.126291,0.306350,-3.953187,37.609120


In [52]:
df_adj = df_adj[30:]

In [59]:
df_adj = df_adj.reset_index(drop=True)

# Environment

In [60]:
import gym
from gym import spaces
import numpy as np

class StockTradingEnv(gym.Env):
    def __init__(self, data, initial_cash=1000000):
        super(StockTradingEnv, self).__init__()
        self.data = data
        self.initial_cash = initial_cash
        self.current_step = None
        self.max_steps = len(data) - 1  # maximum number of trading steps
        self.action_space = spaces.Discrete(3)  # Actions: 0=buy, 1=sell, 2=hold
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(7, ))   # state : date, close, volume, bb_width, bb_pb, macd, rsi 
    
    def get_states(self):
        date = self.data['date'][self.current_step]
        stock_price = self.data['close'][self.current_step]
        volume = self.data['volume'][self.current_step]
        bb_width = self.data['bb_width'][self.current_step]
        bb_pb = self.data['bb_pb'][self.current_step]
        macd = self.data['macd'][self.current_step]
        rsi = self.data['rsi'][self.current_step]
        return [date, stock_price, volume, bb_width, bb_pb, macd, rsi]
      
    def reset(self):
        self.current_step = 0
        self.cash = self.initial_cash
        self.stock_owned = 0
        self.stock_price = self.data['close'][self.current_step]
        self.state = self.get_states()
        return np.array(self.state)
    
    def step(self, action):
        assert self.action_space.contains(action)
        prev_val = self._get_portfolio_value()
        
        self.current_step += 1
        if self.current_step > self.max_steps:
            done = True
        else:
            self.stock_price = self.data['close'][self.current_step]
            self.state = self.get_states()
            reward = self._calculate_reward(prev_val)
            done = False
            
        info = {}
        return np.array(self.state), reward, done, info
    
    def _get_portfolio_value(self):
        return self.cash + self.stock_owned * self.stock_price
    
    def _calculate_reward(self, prev_val):
        current_val = self._get_portfolio_value()
        return current_val - prev_val

In [61]:
env = StockTradingEnv(df_adj)
env.reset()
env.step(2)
env.step(1)

(array([datetime.date(1981, 1, 29), 0.1333709955215454,
        0.10307406634092331, 0.14951293749033503, 0.09020676524511631,
        0.000554267885709836, 42.116042483437916], dtype=object),
 0.0,
 False,
 {})

In [65]:
env.observation_space.shape[0]

7

In [66]:
env.action_space.n

3

# Agent

In [62]:
config = {
    'gamma': 0.99,
    'lr': 1e-4,
    'c1': 1,
    'c2': 0.5,
    'c3': 2e-3,
    'c_trunc': 10,
    'num_env': 8,
    'replay_capacity': 2000,
    'replay_ratio': 0.5,
    'num_replay': 4,
    'seq_length': 5,
    'batch_size': 16,
    'hidden_size': 128,
    'train_env_steps': 1000000,
    'num_eval_episode': 100.
}

In [67]:
# Agent

import torch
import random

import numpy as np

from torch import nn 
from collections import deque

class Agent(nn.Module):
    
    def __init__(self, env, config):
        super().__init__()
        self.config = config
        
        d_state = env.observation_space.shape[0]
        n_action = env.action_space.n
        
        self.encoder = nn.Sequential(
            nn.Linear(d_state, self.config['hidden_size']),
            nn.BatchNorm1d(self.config['hidden_size']),
            nn.Dropout(),
            nn.ELU(),
            nn.Linear(self.config['hidden_size'], self.config['hidden_size']),
            nn.BatchNorm1d(self.config['hidden_size']),
            nn.Dropout(),
            nn.ELU(),
            nn.Linear(self.config['hidden_size'], self.config['hidden_size']),
            nn.BatchNorm1d(self.config['hidden_size']),
            nn.Dropout(),
            nn.ELU(),  
        )
        
        self.action_value_head = nn.Sequential(
            nn.Linear(self.config['hidden_size'], self.config['hidden_size']),
            nn.ELU(),
            nn.Linear(self.config['hidden_size'], n_action)
        )
        
        self.policy_head = nn.Sequential(
            nn.Linear(self.config['hidden_size'], self.config['hidden_size']),
            nn.ELU(),
            nn.Linear(self.config['hidden_size'], n_action),
            nn.Softmax(dim=-1)
        )
        
        self.replay_memory = deque([], maxlen=config['replay_capacity'])
        self.on_policy_batch = deque([], maxlen=config['batch_size'])
        self.trajectory = self.create_trajectory()
        
    def create_trajectory():
        trajectory = {
            'state': list(),
            'action': list(),
            'pi_old': list(),
            'reward': list(),
            'state_next': list(),
            'done': list()
        }
        return trajectory
    
    def add_to_batch(self, s, a, pi, r, s_next, done):
        if (
            len(self.trajectory['state']) == self.config['seq_length']
        ):
            self.on_policy_batch.append(self.trajectory)
            self.trajectory = self.create_trajectory()
            
        if not done:
            len
        
        