In [1]:
# db connection

import pymysql
from sqlalchemy import create_engine
import keyring
import platform
import numpy as np

user = 'root'
pw = keyring.get_password('macmini_db', user)
host = '192.168.219.106' if platform.system() == 'Windows' else '127.0.0.1'
port = 3306
db = 'stock'

# DATA COLUMNS

In [2]:
# base data
COLUMNS_STOCK_DATA = ['date', 'open', 'high', 'low', 'close', 'volume']
COLUMNS_TRAINING_DATA = ['open', 'high', 'low', 'close', 'volume', 'close_ma5', 'volume_ma5', 'close_ma5_ratio', 'volume_ma5_ratio',
       'open_close_ratio', 'open_prev_close_ratio', 'high_close_ratio',
       'low_close_ratio', 'close_prev_close_ratio', 'volume_prev_volume_ratio',
       'close_ma10', 'volume_ma10', 'close_ma10_ratio', 'volume_ma10_ratio',
       'close_ma20', 'volume_ma20', 'close_ma20_ratio', 'volume_ma20_ratio',
       'close_ma60', 'volume_ma60', 'close_ma60_ratio', 'volume_ma60_ratio',
       'close_ma120', 'volume_ma120', 'close_ma120_ratio',
       'volume_ma120_ratio', 'close_ma240', 'volume_ma240',
       'close_ma240_ratio', 'volume_ma240_ratio', 'upper_bb',
       'lower_bb', 'bb_pb', 'bb_width', 'macd',
       'macd_signal', 'macd_oscillator', 'rs', 'rsi']

# DEVICE

In [3]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# UTILITIES

## Load Data

### Get stock price

In [4]:
# get stock price
import pandas as pd
import pymysql
from sqlalchemy import create_engine


# get us stock price of a specific ticker
def get_stock_data(ticker, fro=None, to=None):

    # connect DB
    engine = create_engine(f'mysql+pymysql://{user}:{pw}@{host}:{port}/{db}')

    con = pymysql.connect(
        user=user,
        passwd=pw,
        host=host,
        db=db,
        charset='utf8'
    )
            
    mycursor = con.cursor()
    
    if fro is not None:
        if to is not None:               
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = '{ticker}'
                    AND date BETWEEN '{fro}' AND '{to}' 
                    """
        else:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = '{ticker}'
                    AND date >= '{fro}'
                    """
    
    else:
        if to is not None:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = '{ticker}'
                    AND date <= '{to}' 
                    """
        else:
            query = f""" 
                    SELECT * FROM price_global
                    WHERE ticker = '{ticker}'
                    """
            
    print(query)
    stock_data = pd.read_sql(query, con=engine)
    con.close()
    engine.dispose()
    return stock_data[['date', 'open', 'high', 'low', 'close', 'adj_close', 'volume', 'ticker']]

#### Sample code

In [5]:
stock_code = 'AAPL'
fro = '2018-01-01'
to = '2022-12-31'
df = get_stock_data(stock_code, fro=fro, to=to)

 
                    SELECT * FROM price_global
                    WHERE ticker = 'AAPL'
                    AND date BETWEEN '2018-01-01' AND '2022-12-31' 
                    


### Preprocessing

In [6]:
# preprocessing

def preprocess(data):
    
    # moving average
    windows = [5, 10, 20, 60, 120, 240]
    for window in windows:
        data[f'close_ma{window}'] = data['close'].rolling(window).mean()
        data[f'volume_ma{window}'] = data['volume'].rolling(window).mean()
        data[f'close_ma{window}_ratio'] = (data['close'] - data[f'close_ma{window}']) / data[f'close_ma{window}']
        data[f'volume_ma{window}_ratio'] = (data['volume'] - data[f'volume_ma{window}']) / data[f'volume_ma{window}']
        data['open_close_ratio'] = (data['open'].values - data['close'].values) / data['close'].values
        data['open_prev_close_ratio'] = np.zeros(len(data))
        data.loc[1:, 'open_prev_close_ratio'] = (data['open'][1:].values - data['close'][:-1].values) / data['close'][:-1].values
        data['high_close_ratio'] = (data['high'].values - data['close'].values) / data['close'].values
        data['low_close_ratio'] = (data['low'].values - data['close'].values) / data['close'].values
        data['close_prev_close_ratio'] = np.zeros(len(data))
        data.loc[1:, 'close_prev_close_ratio'] = (data['close'][1:].values - data['close'][:-1].values) / data['close'][:-1].values 
        data['volume_prev_volume_ratio'] = np.zeros(len(data))
        data.loc[1:, 'volume_prev_volume_ratio'] = (
            # if volume is 0, change it into non zero value exploring previous volume continuously
            (data['volume'][1:].values - data['volume'][:-1].values) / data['volume'][:-1].replace(to_replace=0, method='ffill').replace(to_replace=0, method='bfill').values
        )
    
    # Bollinger band
    data['middle_bb'] = data['close'].rolling(20).mean()
    data['upper_bb'] = data['middle_bb'] + 2 * data['close'].rolling(20).std()
    data['lower_bb'] = data['middle_bb'] - 2 * data['close'].rolling(20).std()
    data['bb_pb'] = (data['close'] - data['lower_bb']) / (data['upper_bb'] - data['lower_bb'])
    data['bb_width'] = (data['upper_bb'] - data['lower_bb']) / data['middle_bb']
    
    # MACD
    macd_short, macd_long, macd_signal = 12, 26, 9
    data['ema_short'] = data['close'].ewm(macd_short).mean()
    data['ema_long'] = data['close'].ewm(macd_long).mean()
    data['macd'] = data['ema_short'] - data['ema_long']
    data['macd_signal'] = data['macd'].ewm(macd_signal).mean()
    data['macd_oscillator'] = data['macd'] - data['macd_signal']
    
    # RSI
    data['close_change'] = data['close'].diff()
    # data['close_up'] = np.where(data['close_change'] >=0, df['close_change'], 0)
    data['close_up'] = data['close_change'].apply(lambda x: x if x >= 0 else 0)
    # data['close_down'] = np.where(data['close_change'] < 0, df['close_change'].abs(), 0)
    data['close_down'] = data['close_change'].apply(lambda x: -x if x < 0 else 0)
    data['rs'] = data['close_up'].ewm(alpha=1/14, min_periods=14).mean() / data['close_down'].ewm(alpha=1/14, min_periods=14).mean()
    data['rsi'] = 100 - (100 / (1 + data['rs']))
    
    
    return data

#### Sample code

In [7]:
df_adj = preprocess(df)

### Load data function

load_data() function is a combined function for getting data from databases and preprocessing it into training data.

In [8]:
def load_data(stock_code, fro, to):
    ''' 
    Arguments
    ----------
    - stock_code : unique stock code
    - fro : start date
    - to : end data
    
    Returns
    --------
    df_adj : entire prerprocessed data
    stock_data : data for plotting chart
    training_data : data for training a model
    '''
    
    df = get_stock_data(stock_code, fro, to)
    df_adj = preprocess(df).dropna().reset_index(drop=True)
    # df_adj.dropna(inplace=True).reset_index(drop=True)
    
    stock_data = df_adj[COLUMNS_STOCK_DATA]
    training_data = df_adj[COLUMNS_TRAINING_DATA]
    
    return df_adj, stock_data, training_data.values

#### Sample code

In [9]:
df_adj, df_stock_data, df_training_data = load_data(stock_code, fro, to)

 
                    SELECT * FROM price_global
                    WHERE ticker = 'AAPL'
                    AND date BETWEEN '2018-01-01' AND '2022-12-31' 
                    


## Functions

### Sigmoid function

- function for calculating probabilities based on the value

In [10]:
def sigmoid(x):
    x = max(min(x, 10), -10)
    return 1. / (1. + np.exp(-x))

In [11]:
df

Unnamed: 0,date,open,high,low,close,adj_close,volume,ticker,close_ma5,volume_ma5,...,ema_short,ema_long,macd,macd_signal,macd_oscillator,close_change,close_up,close_down,rs,rsi
0,2018-01-02,42.314999,42.540001,43.075001,43.064999,40.670979,102223600.0,AAPL,,,...,43.064999,43.064999,0.000000,0.000000,0.000000,,0.000000,0.000000,,
1,2018-01-03,42.990002,43.132500,43.637501,43.057499,40.663895,118071600.0,AAPL,,,...,43.061099,43.061178,-0.000079,-0.000042,-0.000038,-0.007500,0.000000,0.007500,,
2,2018-01-04,43.020000,43.134998,43.367500,43.257500,40.852776,89738400.0,AAPL,,,...,43.131870,43.129103,0.002767,0.000995,0.001772,0.200001,0.200001,0.000000,,
3,2018-01-05,43.262501,43.360001,43.842499,43.750000,41.317909,94640000.0,AAPL,,,...,43.305420,43.293222,0.012198,0.004252,0.007946,0.492500,0.492500,0.000000,,
4,2018-01-08,43.482498,43.587502,43.902500,43.587502,41.164436,82271200.0,AAPL,43.343500,97388960.0,...,43.371209,43.356602,0.014607,0.006781,0.007826,-0.162498,0.000000,0.162498,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254,2022-12-23,129.639999,130.919998,132.419998,131.860001,130.959976,63814900.0,AAPL,132.841998,76924080.0,...,140.258514,143.745042,-3.486528,-2.163850,-1.322678,-0.369995,0.000000,0.369995,0.554614,35.675357
1255,2022-12-27,128.720001,131.380005,131.410004,130.029999,129.142441,69007800.0,AAPL,132.373999,74807120.0,...,139.471705,143.237077,-3.765373,-2.324002,-1.441370,-1.830002,0.000000,1.830002,0.507354,33.658569
1256,2022-12-28,125.870003,129.669998,131.029999,126.040001,125.179680,85438400.0,AAPL,131.121999,76408240.0,...,138.438497,142.600149,-4.161652,-2.507767,-1.653885,-3.989998,0.000000,3.989998,0.422765,29.714325
1257,2022-12-29,127.730003,127.989998,130.479996,129.610001,128.725311,75703700.0,AAPL,129.953999,74363380.0,...,137.759382,142.119032,-4.359650,-2.692956,-1.666695,3.570000,3.570000,0.000000,0.583415,36.845374


# ENVIRONMENTS

- Environment has stock market data and return the current and next price to the agent/

In [12]:
# environment

import numpy as np
import pandas as pd

# environment

class Environment:
    ''' 
    Attribute
    ---------
    - stock_data : stock price data such as 'open', 'close', 'high', 'low', 'volume'
    - state : current state
    - idx : current postion of stock data
    
    
    Functions
    --------
    - reset() : initialize idx and state
    - observe() : move idx into next postion and get a new state
    - get_close_price() : get close price of current state
    - get_next_close_price() : get close price of next index state
    - get_open_price() : get open price of current state
    - get_next_open_price() : get open price of next indext state
    - get_state() : get current state
    '''
    
    def __init__(self, stock_data=None):
        self.close_price_idx = 4    # index postion of close price
        self.open_price_idx = 1     # index position of open price
        self.stock_data = stock_data
        self.state = None
        self.idx = -1
        self.max_idx = len(stock_data)
        
    def reset(self):
        self.state = None
        self.idx = -1
        # self.idx = 0
        # self.state = self.stock_data.iloc[self.idx]
        
    def observe(self):
        # move to next day and get price data
        # if there is no more idx, return None
        if len(self.stock_data) > self.idx + 1:
            self.idx += 1
            self.state = self.stock_data.iloc[self.idx]
            return self.state
        return None
    
    def get_close_price(self):
        # return close price
        if self.state is not None:
            return self.state[self.close_price_idx]
        return None
    
    def get_next_close_price(self):
        # return tomorrow close price
        if self.idx < self.max_idx - 1:
            return self.stock_data.iloc[self.idx + 1, self.close_price_idx]
        else:
            return self.stock_data.iloc[self.idx, self.close_price_idx]
    
    def get_open_price(self):
        # return open price
        if self.state is not None:
            return self.state[self.open_price_idx]
        
    def get_next_open_price(self):
        # return tomorrow open price
        if self.idx < self.max_idx - 1:
            return self.stock_data.iloc[self.idx + 1, self.open_price_idx]
        else:
            return self.stock_data.iloc[self.idx, self.open_price_idx] 
    
    def get_state(self):
        # return current state
        if self.state is not None:
            return self.state
        return None
        

#### Sample code

In [13]:
e = Environment(df_adj)
e.reset()

In [14]:
e.get_state()

In [15]:
e.observe()

date                              2018-12-13
open                               42.387501
high                               42.622501
low                                43.142502
close                              42.737499
adj_close                          40.967648
volume                           127594400.0
ticker                                  AAPL
close_ma5                          42.338501
volume_ma5                       175292480.0
close_ma5_ratio                     0.009424
volume_ma5_ratio                   -0.272106
open_close_ratio                   -0.008189
open_prev_close_ratio               0.002661
high_close_ratio                   -0.002691
low_close_ratio                     0.009477
close_prev_close_ratio               0.01094
volume_prev_volume_ratio           -0.104669
close_ma10                         43.528251
volume_ma10                      170264920.0
close_ma10_ratio                   -0.018166
volume_ma10_ratio                  -0.250613
close_ma20

In [16]:
e.get_state()

date                              2018-12-13
open                               42.387501
high                               42.622501
low                                43.142502
close                              42.737499
adj_close                          40.967648
volume                           127594400.0
ticker                                  AAPL
close_ma5                          42.338501
volume_ma5                       175292480.0
close_ma5_ratio                     0.009424
volume_ma5_ratio                   -0.272106
open_close_ratio                   -0.008189
open_prev_close_ratio               0.002661
high_close_ratio                   -0.002691
low_close_ratio                     0.009477
close_prev_close_ratio               0.01094
volume_prev_volume_ratio           -0.104669
close_ma10                         43.528251
volume_ma10                      170264920.0
close_ma10_ratio                   -0.018166
volume_ma10_ratio                  -0.250613
close_ma20

In [17]:
e.get_next_close_price()

41.369998931884766

In [18]:
e.observe()

date                              2018-12-14
open                                   41.32
high                                   42.25
low                                    42.27
close                              41.369999
adj_close                          39.656776
volume                           162814800.0
ticker                                  AAPL
close_ma5                             42.188
volume_ma5                       174030160.0
close_ma5_ratio                    -0.019389
volume_ma5_ratio                   -0.064445
open_close_ratio                   -0.001209
open_prev_close_ratio              -0.033168
high_close_ratio                    0.021271
low_close_ratio                     0.021755
close_prev_close_ratio             -0.031998
volume_prev_volume_ratio            0.276034
close_ma10                         43.176501
volume_ma10                      169838400.0
close_ma10_ratio                    -0.04184
volume_ma10_ratio                  -0.041355
close_ma20

In [19]:
e.get_state()['close']

41.369998931884766

In [20]:
e.get_close_price()

41.369998931884766

In [21]:
e.get_next_open_price()

40.682498931884766

In [22]:
e.observe()

date                              2018-12-17
open                               40.682499
high                               41.362499
low                                42.087502
close                              40.985001
adj_close                          39.287712
volume                           177151600.0
ticker                                  AAPL
close_ma5                             41.905
volume_ma5                       159839680.0
close_ma5_ratio                    -0.021954
volume_ma5_ratio                    0.108308
open_close_ratio                   -0.007381
open_prev_close_ratio              -0.016618
high_close_ratio                    0.009211
low_close_ratio                       0.0269
close_prev_close_ratio             -0.009306
volume_prev_volume_ratio            0.088056
close_ma10                         42.810501
volume_ma10                      171740960.0
close_ma10_ratio                   -0.042641
volume_ma10_ratio                   0.031505
close_ma20

In [23]:
e.get_state()['open']

40.682498931884766

In [24]:
e.get_open_price()

40.682498931884766

# AGENT



- Agent decides and validates an action accoding to the value of networks from learners class.

- Agent also has initial balance and current portfolio value.

- Agent has its own states : ratio of holding stocks, profit/loss ratio, current current price to average buying price ratio

In [None]:
class Agent:
    ''' 
    Attributes
    --------
    - enviroment : instance of environment
    - initial_balance : initial capital balance
    - min_trading_price : minimum trading price
    - max_trading_price : maximum trading price
    - balance : cash balance
    - num_stocks : obtained stocks
    - portfolio_value : value of portfolios (balance + price * num_stocks)
    - num_buy : number of buying
    - num_sell : number of selling
    - num_hold : number of holding
    - ratio_hold : ratio of holding stocks
    - profitloss : current profit or loss
    - avg_buy_price_ratio : the ratio average price of a stock bought to the current price
    
    Functions
    --------
    - reset() : initialize an agent
    - set_balance() : initialize balance
    - get_states() : get the state of an agent
    - decide_action() : exploration or exploitation behavior according to the policy net
    - validate_action() : validate actions
    - decide_trading_unit() : decide how many stocks are sold or bought
    - act() : act the actions
    '''
    
    # agent stste dimensions
    ## (ratio_hold, profitloss, current price to avg_buy_price ratio)
    STATE_DIM = 3
    
    # trading charge and tax
    TRADING_CHARGE = 0.00015    # trading charge 0.015%
    TRADING_TAX = 0.002          # trading tax = 0.2% 
    
    # action space
    ACTION_BUY = 0      # buy
    ACTION_SELL = 1     # sell
    ACTION_HOLD = 2     # hold
    
    # get probabilities from neural nets
    ACTIONS = [ACTION_BUY, ACTION_SELL, ACTION_HOLD]
    NUM_ACTIONS = len(ACTIONS)      # output number from nueral nets
    
    def __init__(self, env,
                 initial_balance=None, min_trading_money=None, max_trading_money=None):        
        
        # get current price from the environment
        self.env = env
        self.initial_balance = initial_balance
        
        # minumum and maximum trainding price
        self.min_trading_money = min_trading_money
        self.max_trading_money = max_trading_money
        
        # attributes for an agent class
        self.balance = initial_balance
        self.num_stocks = 0
        
        # value of portfolio : balance + num_stocks * {current stock price}
        self.portfolio_value = self.balance
        self.num_buy = 0
        self.num_sell = 0
        self.num_hold = 0
        
        # three states of Agent class
        self.ratio_hold = 0
        self.profitloss = 0
        self.avg_buy_price = 0
        
    def reset(self):
        self.balance = self.initial_balance
        self.num_stocks = 0
        self.portfolio_value = self.balance
        self.num_buy = 0
        self.num_sell = 0
        self.num_hold = 0
        self.ratio_hold = 0
        self.profitloss = 0
        self.avg_buy_price = 0
        self.done = False
        # self.env.reset()
        
    def set_initial_balance(self, balance):
        self.initial_balance = balance
        
    def get_states(self):
        # return current profitloss based on close price
        close_price = self.env.get_close_price()
        self.ratio_hold = self.num_stocks * close_price / self.portfolio_value
        self.portfolio_value = self.balance + close_price * self.num_stocks
        self.profitloss = self.portfolio_value / self.initial_balance - 1
        return (
            self.ratio_hold,
            self.profitloss,        # profitloss = (portfolio_value / initial_balance) - 1
            (self.env.get_close_price() / self.avg_buy_price) if self.avg_buy_price > 0 else 0
        )
        
    def decide_action(self, pred_value, pred_policy, eps):
        # act randomly with epsilon probability, act according to neural network  with (1 - epsilon) probability
        confidence = 0
        
        # if theres is a pred_policy, follow it, otherwise follow a pred_value
        pred = pred_policy
        if pred is None:
            pred = pred_value
            
        # there is no prediction from both pred_policy and pred_value, explore!
        if pred is None:
            eps = 1
        else:
            maxpred = np.max(pred)
            # if values for actions are euqal, explore!
            if (pred == maxpred).all():
                eps = 1
                
            # if the difference between buying and selling prediction policy value is less than 0.05, explore!
            if pred_policy is not None:
                if np.max(pred_policy) - np.min(pred_policy) < 0.05:
                    eps = 1
                    
        # decide whether exploration will be done or not
        if np.random.rand() < eps:
            exploration = True
            action = np.random.randint(self.NUM_ACTIONS) 
        else: 
            exploration = False
            action = np.argmax(pred)
            
        confidence = .5
        if pred_policy is not None:
            confidence = pred[action]
        elif pred_value is not None:
            confidence = sigmoid(pred[action])
            
        return action, confidence, exploration
    
    def validate_action(self, action):
        # validate if the action is available
        if action == Agent.ACTION_BUY:
            # check if al least one stock can be bought.
            if self.balance < self.env.get_next_open_price() * (1 + self.TRADING_CHARGE):
                return False
        elif action == Agent.ACTION_SELL:
            # check if there is any sotck that can be sold
            if self.num_stocks <= 0:
                return False
        
        return True
    
    def decide_trading_unit(self, confidence):
        # adjust number of stocks for buying and selling according to confidence level
        if np.isnan(confidence):
            return self.min_trading_money
        
        # set buying price range between self.min_trading_money + added_trading_price [min_trading_money, max_trading_money]
        # in case that confidence > 1 causes the price over max_trading_money, we set min() so that the value cannot have larger value than self.max_trading_money - self.min_trading_money
        # in case that confidence < 0, we set max() so that added_trading_price cannot have negative value.
        added_trading_money = max(min(
            int(confidence * (self.max_trading_money - self.min_trading_money)),
            self.max_trading_money - self.min_trading_money
        ), 0)
        
        trading_price = self.min_trading_money + added_trading_money
        
        return max(int(trading_price / self.env.get_next_open_price()), 1)
    
    def step(self, action, confidence):
        '''
        Arguments
        ---------
        - action : decided action from decide_action() method based on exploration or exploitation (0 or 1)
        - confidence : probability from decide_action() method, the probability from policy network or the softmax probability from value network
        '''
        
        # get the next open price from the environment
        
        open_price = self.env.get_next_open_price()
        
        if not self.validate_action(action):
            action = Agent.ACTION_HOLD
        
        # buy
        if action == Agent.ACTION_BUY:
            # decide how many stocks will be bought
            trading_unit = self.decide_trading_unit(confidence)
            balance = (
                self.balance - open_price * (1 + self.TRADING_CHARGE) * trading_unit
            )
            
            # if lacks of balance, buy maximum units within the amount of money available
            if balance < 0:
                trading_unit = min(
                    int(self.balance / (open_price * (1 + self.TRADING_CHARGE))),
                    int(self.max_trading_money / open_price)
                )
                
            # total amount of money with trading charge
            invest_amount = open_price * (1 + self.TRADING_CHARGE) * trading_unit
            if invest_amount > 0:
                self.avg_buy_price = (self.avg_buy_price * self.num_stocks + open_price * trading_unit) / (self.num_stocks + trading_unit)
                self.balance -= invest_amount
                self.num_stocks += trading_unit
                self.num_buy += 1
                
        # sell
        elif action == self.ACTION_SELL:
            # decide how many stocks will be sold
            trading_unit = self.decide_trading_unit(confidence)
            
            # if lacks of stocks, sell maximum units available
            trading_unit = min(trading_unit, self.num_stocks)
            
            # selling amount
            invest_amount = open_price * (
                1 - (self.TRADING_TAX + self.TRADING_CHARGE)
            ) * trading_unit
            
            if invest_amount > 0:
                # update average buy price
                self.avg_buy_price = (self.avg_buy_price * self.num_stocks - open_price * trading_unit) / (self.num_stocks - trading_unit) if self.num_stocks > trading_unit else 0
                self.num_stocks -= trading_unit
                self.balance += invest_amount
                self.num_sell += 1
                
        # hold
        elif action == self.ACTION_HOLD:
            self.num_hold += 1
            
        # update portfolio value with close price
        close_price = self.env.get_next_close_price()
        
        self.portfolio_value = self.balance + close_price * self.num_stocks
        self.profitloss = self.portfolio_value / self.initial_balance - 1
        
        # info = {
        #     'num_stocks': self.num_stocks,
        #     'num_hold': self.num_hold,
        #     'num_buy': self.num_buy,
        #     'num_sell': self.num_sell
        # }
        
        # return next_state, self.profitloss, self.done, info             # (next_states, profitloss, done, info)
        return self.profitloss
    
   

# NETWORK

In [27]:
import threading
import abc
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F

In [26]:
device

device(type='cuda')

In [28]:
class Network:
    '''
    Common attributes and methods for neural networks
    
    Attributes
    --------
    - input_dim
    - output_dim
    - lr : learning rate
    - shared_network : head of neural network which is shared with various networks (e.g., A2C)
    - activation : activation layer function ('linear', 'sigmoid', 'tanh', 'softmax')
    - loss : loss function for networks
    - model : final neural network model
    
    Functions
    --------
    - predict() : calculate value or probability of actions
    - train_on_batch() : generate batch data for training
    - save_model()
    - load_model()
    - get_shared_network() : generate network head for the networks
    '''
    
    # threading lock for A3C
    lock = threading.Lock()
    
    def __init__(
        self, input_dim=0, output_dim=0, num_steps=1, lr=0.001,
        net=None, shared_network=None, activation='sigmoid', loss='mse'
    ):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_steps = num_steps
        self.lr = lr
        self.shared_network = shared_network
        self.activation = activation
        self.loss = loss
        self.net = net
        
        # data shape for various network
        # CNN, LSTM have 3 dimensional shape, so we set input shape as (num_stpes, input_dim)
        # DNN have 2 dimensional shape and we set input shape as (input_dim,)
        inp = None
        if self.num_steps > 1:
            inp = (self.num_steps, input_dim)
        else:
            inp = (self.input_dim,)
            
        # in case that shared network is used,
        self.head = None
        if self.shared_network is None:
            self.head = self.get_network_head(inp, self.output_dim)
        else:
            self.head = self.shared_network
            
        # neual network model
        ## generate network model for head
        self.model = nn.Sequential(self.head)
        
        # add activation layer
        if self.activation == 'linear':
            pass
        elif self.activation == 'relu':
            self.model.add_module('activation', nn.ReLU())
        elif self.activation == 'leaky_relu':
            self.model.add_module('activation', nn.LeakyReLU())
        elif self.activation == 'sigmoid':
            self.model.add_module('activation', nn.Sigmoid())
        elif self.activation == 'tanh':
            self.model.add_module('activation', nn.Tanh())
        elif self.activation == 'softmax':
            self.model.add_module('activation', nn.Softmax(dim=1))
        self.model.apply(Network.init_weights)
        self.model.to(device)
        
        # optimizer
        self.optimizer = torch.optim.NAdam(self.model.parameters(), lr=self.lr)
        
        # loss function
        self.criterion = None
        if loss == 'mse':
            self.criterion = nn.MSELoss()
        elif loss == 'binary_crossentropy':
            self.criterion = nn.BCELoss()
            
    def predict(self, sample):
        # return prediction of buy, sell and hold on given sample
        # value network returns each actions' value on sample and policy network returns each actions' probabilities on sample
        with self.lock:
            # transform evaluation mode: deactivate module used only on training such as Dropout
            self.model.eval()
            with torch.no_grad():
                x = torch.from_numpy(sample).float().to(device)
                pred = self.model(x).detach().cpu().numpy()
                pred = pred.faltten()
            return pred
        
    def train_on_batch(self, x, y):
        if self.num_steps > 1:
            x = np.array(x).reshape((-1, self.num_steps, self.input_dim))
        else:
            x = np.array(x).reshape((-1, self.input_dim))
            
        loss = 0.
        
        if self.net == 'ppo':
            pass
        else:
            with self.lock:
                self.model.train()
                _x = torch.from_numpy(x).float().to(device)
                _y = torch.from_numpy(y).float().to(device)
                y_pred = self.model(_x)
                _loss = self.criterion(y_pred, _y)
                _loss.backward()
                self.optimizer.zero_grad()
                loss += _loss.item()
            return loss

    

IndentationError: expected an indented block (829353598.py, line 107)