In [1]:
from collections import deque
from datetime import datetime


class Environment2:
    """
    action의 종류
        - BUY: 가진 현금에서 [100%,50%,25%,10%,5%] 매수
        - SELL: 가진 주식에서 [100%,50%,25%,10%,5%] 매도
        - HOLD: DO NOTHING
        
    action = 6 => 5% 매수   
    action = 7 => 10% 매수  
    action = 8 => 25% 매수  
    action = 9 => 50% 매수  
    action = 10 => 100% 매수

    action = 0 => 5% 매도  
    action = 1 => 10% 매도 
    action = 2 => 25% 매도 
    action = 3 => 50% 매도 
    action = 4 => 100% 매도
    
    action = 5 => Hoding  
    
    """
    # mid price = (high + low) / 2
    # df[index][col][-1]
    PCT_IDX = 17 # 1분전 MID price의 가격 변동 
    MID_IDX = 5
    
    def __init__(self, chart_data=None, chart_index:list = None, risk_adverse= 1.3 ,stop_trade=0.9 ,balance=100000000, transaction=0.0005, max_leverage=3):
        self.chart_data = chart_data # numpy array
        self.chart_index = chart_index # python list
        self.idx = 0

        self.risk_adverse = risk_adverse # 손실에 주는 가중치 
        self.stop_trade = stop_trade # 손절선
        self.transaction = transaction # 거래수수료
        self.max_leverage = max_leverage
        
        self.current_state = chart_data[self.idx]
        self.next_state = chart_data[self.idx+1]
                    
        self.current_price = self.chart_data[self.idx][self.MID_IDX][-1]
        self.next_price = self.chart_data[self.idx+1][self.MID_IDX][-1]
        
        self.balance = [balance]  # 포트폴리오가 보유한 현금
        self.bitcoin = [0]  # 포트폴리오가 보유한 비트코인의 가치 (매 거래마다 바로 청산됨)
        self.portfolio_value = []
        
        self.action_list = deque([5 for i in range(30)]) # 이전 30분을 저장함.
        self.action_info = [-3, -2, -1, -0.5, -0.25 , 0 , 0.25, 0.5, 1, 2, 3]
        self.position = 0 # 이전의 position 비율을 저장하는 변수. (+)는 long, (-)는 short
        self.profit_queue = deque([0.0004 for i in range(60)]) # 이전 1시간의 변동성을 고려함
        
        
    def reset(self):
        self.idx = 0
        state = self.chart_data[self.idx]
        return state
    
    def get_profit_std(self, profit):
        self.profit_queue.popleft()
        self.profit_queue.append(profit)
        std = (pow(sum(self.profit_queue),2)/len(self.profit_queue))**0.5
        return abs(std)
    
    
    def step(self, action):
        self.current_state = self.chart_data[self.idx]
        self.next_state = self.chart_data[self.idx+1]
        self.current_price = self.chart_data[self.idx][self.MID_IDX][-1]
        self.next_price = self.chart_data[self.idx+1][self.MID_IDX][-1]

        current_value = self.balance[-1] + self.bitcoin[-1]*self.current_price 
        self.portfolio_value.append(current_value)
        
        s_prime = self.chart_data[self.idx+1]
        
        # action list에 새로운 action 추가해줌
        self.action_list.append(action)
        self.action_list.popleft()
            
        # reward 계산
        profit = self.get_reward(action)
        
        # 얻은 수익률의 표준편차를 구해준다.
        std = self.get_profit_std(profit)  
            
        # sharpe ratio를 maximize하는 형식
        reward = profit/std 

        # 시간 index 갱신
        self.idx += 1
        
        # risk adverse정도를 고려해서 reward 계산
        if reward<0:
            reward =  reward * self.risk_adverse
        
        current_time = datetime.strptime(self.chart_index[self.idx], '%Y-%m-%d %H:%M:%S')
        current_day = datetime.strftime(current_time, '%Y-%m-%d')
        next_time = datetime.strptime(self.chart_index[self.idx+1], '%Y-%m-%d %H:%M:%S')
        next_day = datetime.strftime(next_time, '%Y-%m-%d')

        if current_day == next_day:
            return {"state_time":current_day, 
                    "next_state":s_prime, 
                    "reward":round(reward,8), 
                    "done":False, 
                    "portfolio_value":self.portfolio_value[-1], 
                    "balance":self.balance[-1], 
                    "bitcoin":self.bitcoin[-1], 
                    "position":self.position,
                    "action_list":self.action_list}
        else:
            print("#########################################################################")
            print(f'{self.current_state.name}에서 {self.portfolio_value[-1]}으로 trading stop')

            return {"state_time":current_day, 
                    "next_state":s_prime, 
                    "reward":round(reward,8), 
                    "done":True, 
                    "portfolio_value":self.portfolio_value[-1], 
                    "balance":self.balance[-1], 
                    "bitcoin":self.bitcoin[-1], 
                    "position":self.position,
                    "action_list":self.action_list}
      
        
    def position_calc(self, action): # max_leverage를 고려해서 position을 계산해주는 함수
        action = self.action_info[action] # action의 실제 action
        
        if action * self.position > 0:
            if  self.position + action > self.max_leverage:
                ratio = 0
                self.position = self.max_leverage
                return self.position, ratio, False
            
            elif self.position + action < -self.max_leverage:
                ratio  = 0 
                self.position = -self.max_leverage
                return self.position, ratio, False
            else:
                self.position += action
                ratio = action
                return self.position, ratio, False   
                    
        elif action * self.position < 0:
            self.position = action 
            ratio = action 
            return self.position, ratio, True
        
        elif action*self.position==0:
            if action==0:
                self.position = action 
                return self.position, 0, False
            
            elif self.position==0:
                self.position = action 
                return self.position, self.position, False
            
        
    def get_reward(self, action):     
        temp_position = self.position
        # Short
        if action <= 4:
            position, ratio, execution = self.position_calc(action)
            sell_budget = self.balance[0] * ratio
            
            if execution: # 직전 포지션이 long
                clearing_budget = self.bitcoin[-1] * self.current_price
                # 이전에 매수를 한 경우 => 현재 매도(long 청산)
                self.balance.append(self.balance[-1] - sell_budget*(1-self.transaction) + clearing_budget*(1-self.transaction))
                self.bitcoin.append(sell_budget/self.current_price)          
            else: # 직전 포지션이 short
                self.balance.append(self.balance[-1] - sell_budget*(1-self.transaction))
                self.bitcoin.append(self.bitcoin[-1] + sell_budget/self.current_price)  
        
            current_value = self.portfolio_value[-1]
            next_value = self.balance[-1] + self.bitcoin[-1]*(self.next_price)
            reward = next_value/current_value - 1
            return reward
            
        # Long
        elif action >= 6:
            position, ratio, execution = self.position_calc(action)
            buy_budget = self.balance[0] * ratio
            
            if execution: # 직전 포지션 short
                clearing_budget = self.bitcoin[-1] * self.current_price
                self.balance.append(self.balance[-1] - buy_budget*(1+self.transaction) + clearing_budget*(1+self.transaction))    
                self.bitcoin.append(buy_budget/self.current_price)              

            else: # 직전 포지션 long
                self.balance.append(self.balance[-1] - buy_budget*(1+self.transaction))
                self.bitcoin.append(self.bitcoin[-1] + buy_budget/self.current_price)  
            
            current_value = self.portfolio_value[-1]
            next_value = self.balance[-1] + self.bitcoin[-1]*(self.next_price)
            reward = next_value/current_value-1   
            return reward
        
        # HOLD
        elif action == 5:
    
            reward = -abs(self.next_state[self.PCT_IDX][-1])/5 
            return reward
        

In [2]:
import os
os.getcwd()

'c:\\Users\\user\\Documents\\GitHub\\mini-project\\bitcoin_scalping_bot\\ipynb 파일'

In [3]:
import pandas as pd
import numpy as np
import pickle

In [4]:
df_index = pd.read_csv('..\\upbit_data\\train_data_2023_3D_index.csv', index_col=0)

with open('..\\upbit_data\\train_data_2023_3D.pkl', 'rb') as f:
    df = pickle.load(f)


0. df_2023['open']
1. df_2023['high']
2. df_2023['low']
3. df_2023['close']
4. df_2023['volume']
5. df_2023['mid_price']
6. df_2023['7min_MA']
7. df_2023['25min_MA']
8. df_2023['99min_MA']
9. df_2023['7_ema']
10. df_2023['25_ema']
11. df_2023['99_ema']

--------

12. df_2023['open'].pct_change()
13. df_2023['high'].pct_change()
14. df_2023['low'].pct_change()
15. df_2023['close'].pct_change()
16. df_2023['volume'].pct_change()
17. df_2023['mid_price'].pct_change()
18. df_2023['7min_MA'].pct_change()
19. df_2023['25min_MA'].pct_change()
20. df_2023['99min_MA'].pct_change()
21. df_2023['7_ema'].pct_change()
22. df_2023['25_ema'].pct_change()
23. df_2023['99_ema'].pct_change()


In [5]:
df_index.shape, df.shape

((344199, 1), (344199, 24, 60))

In [6]:
print(df[0])

[[ 2.11188833e+07  2.11194333e+07  2.11198500e+07 ...  2.11000833e+07
   2.10999667e+07  2.10997833e+07]
 [ 2.11252667e+07  2.11257667e+07  2.11263500e+07 ...  2.11058333e+07
   2.11054333e+07  2.11050667e+07]
 [ 2.11113167e+07  2.11119000e+07  2.11124333e+07 ...  2.10961000e+07
   2.10959167e+07  2.10957167e+07]
 ...
 [ 3.46149177e-05  3.23806301e-05  3.09018035e-05 ... -1.59345545e-05
  -1.54041932e-05 -1.49078923e-05]
 [ 3.87875072e-05  3.77787926e-05  3.69082469e-05 ... -1.82220619e-05
  -1.78829566e-05 -1.75396261e-05]
 [ 2.78610218e-05  2.78129375e-05  2.77832355e-05 ... -4.54834178e-06
  -4.75208086e-06 -4.94266394e-06]]


In [7]:
# mid price
df[0][5]

array([21118291.66666667, 21118833.33333333, 21119391.66666667,
       21119950.        , 21120483.33333333, 21120850.        ,
       21121133.33333333, 21121316.66666667, 21121483.33333333,
       21121550.        , 21121541.66666667, 21121491.66666667,
       21121450.        , 21121275.        , 21121125.        ,
       21120908.33333333, 21120616.66666667, 21120241.66666667,
       21119825.        , 21119433.33333333, 21119041.66666667,
       21118575.        , 21117883.33333333, 21117250.        ,
       21116666.66666667, 21116166.66666667, 21115750.        ,
       21115108.33333333, 21114558.33333333, 21113983.33333333,
       21113325.        , 21112675.        , 21112108.33333333,
       21111533.33333333, 21110975.        , 21110291.66666667,
       21109533.33333333, 21108958.33333333, 21108400.        ,
       21107866.66666667, 21107316.66666667, 21106750.        ,
       21106241.66666667, 21105850.        , 21105508.33333333,
       21105275.        , 21104916.66666

In [8]:
# mid_price pct_chg
df[0][17]

array([ 3.51791095e-05,  2.56975241e-05,  2.64856542e-05,  2.64856542e-05,
        2.53017459e-05,  1.74030719e-05,  1.34504600e-05,  8.71252634e-06,
        7.92183205e-06,  3.18305676e-06, -3.70912266e-07, -2.34546736e-06,
       -1.95053201e-06, -8.26845896e-06, -7.08317603e-06, -1.02405639e-05,
       -1.37917095e-05, -1.77400338e-05, -1.97125604e-05, -1.85291845e-05,
       -1.85291845e-05, -2.20760497e-05, -3.27239516e-05, -2.99614126e-05,
       -2.76002793e-05, -2.36587725e-05, -1.97195097e-05, -3.03732017e-05,
       -2.60404240e-05, -2.72286805e-05, -3.11741924e-05, -3.07797898e-05,
       -2.68327526e-05, -2.72272205e-05, -2.64350771e-05, -3.23527895e-05,
       -3.59184353e-05, -2.72332998e-05, -2.64358038e-05, -2.52512029e-05,
       -2.60401202e-05, -2.68283831e-05, -2.40598708e-05, -1.85398965e-05,
       -1.61798389e-05, -1.10427213e-05, -1.69635061e-05, -1.34123697e-05,
       -1.53872336e-05, -1.93370648e-05, -2.28888738e-05, -2.13057458e-05,
       -1.61720058e-05, -

In [9]:
list(df_index.index)[:10]

['2023-01-01 12:59:00',
 '2023-01-01 13:00:00',
 '2023-01-01 13:01:00',
 '2023-01-01 13:02:00',
 '2023-01-01 13:03:00',
 '2023-01-01 13:04:00',
 '2023-01-01 13:05:00',
 '2023-01-01 13:06:00',
 '2023-01-01 13:07:00',
 '2023-01-01 13:08:00']

In [10]:
env = Environment2(df, list(df_index.index))

In [11]:
env.reset()

array([[ 2.11188833e+07,  2.11194333e+07,  2.11198500e+07, ...,
         2.11000833e+07,  2.10999667e+07,  2.10997833e+07],
       [ 2.11252667e+07,  2.11257667e+07,  2.11263500e+07, ...,
         2.11058333e+07,  2.11054333e+07,  2.11050667e+07],
       [ 2.11113167e+07,  2.11119000e+07,  2.11124333e+07, ...,
         2.10961000e+07,  2.10959167e+07,  2.10957167e+07],
       ...,
       [ 3.46149177e-05,  3.23806301e-05,  3.09018035e-05, ...,
        -1.59345545e-05, -1.54041932e-05, -1.49078923e-05],
       [ 3.87875072e-05,  3.77787926e-05,  3.69082469e-05, ...,
        -1.82220619e-05, -1.78829566e-05, -1.75396261e-05],
       [ 2.78610218e-05,  2.78129375e-05,  2.77832355e-05, ...,
        -4.54834178e-06, -4.75208086e-06, -4.94266394e-06]])

In [312]:
env.step(4)

{'state_time': '2023-01-01',
 'next_state': array([[ 2.10906500e+07,  2.10903833e+07,  2.10897833e+07, ...,
          2.10891000e+07,  2.10889333e+07,  2.10888333e+07],
        [ 2.10988500e+07,  2.10984167e+07,  2.10979833e+07, ...,
          2.10942167e+07,  2.10940500e+07,  2.10939000e+07],
        [ 2.10841833e+07,  2.10839000e+07,  2.10837333e+07, ...,
          2.10850167e+07,  2.10849000e+07,  2.10848667e+07],
        ...,
        [-2.27995526e-05, -2.13427111e-05, -1.95581896e-05, ...,
         -4.02081817e-06, -4.69497452e-06, -4.60785348e-06],
        [-2.03461164e-05, -2.00868996e-05, -1.96347279e-05, ...,
         -5.79735106e-06, -5.86822993e-06, -5.75130427e-06],
        [-8.13369627e-06, -8.32812466e-06, -8.45748411e-06, ...,
         -4.49442771e-06, -4.54330358e-06, -4.53901354e-06]]),
 'reward': 0.2783838,
 'done': False,
 'portfolio_value': 97560471.17636532,
 'balance': 397828260.5201608,
 'bitcoin': -14.237802948807188,
 'position': -3,
 'action_list': deque([4,
  