In [1]:
import pandas as pd

In [6]:
class Environment:
    """
    action의 종류
        - BUY: 가진 현금에서 [100%,50%,25%,10%,5%] 매수
        - SELL: 가진 주식에서 [100%,50%,25%,10%,5%] 매도
        - HOLD: DO NOTHING
        
    action = 1 => 5% 매수 
    action = 2 => 10% 매수 
    action = 3 => 25% 매수 
    action = 4 => 50% 매수 
    action = 5 => 100% 매수 
    
    action = -1 => 5% 매도 
    action = -2 => 10% 매도 
    action = -3 => 25% 매도 
    action = -4 => 50% 매도 
    action = -5 => 100% 매도
    
    action = 0 => Hoding 
    
    """
    # mid price = (high + low) / 2
    PCT_IDX = -3 # 1분전 MID price의 가격 변동 
    PCT_SIGN_IDX = -2 # 1분전 MID price의 가격 변동 부호
    MID_IDX = -1
    
    def __init__(self, chart_data=None, risk_adverse= 1.4 ,stop_trade=0.95 ,balance=100000000, transaction=0.05):
        self.chart_data = chart_data
        self.idx = 0

        
        self.risk_adverse = risk_adverse # 손실에 주는 가중치 
        self.stop_trade = stop_trade # 손절선
        self.transaction = transaction # 거래수수료
        
        self.current_state = chart_data.iloc[self.idx]
        self.next_state = chart_data.iloc[self.idx+1]
        
        self.balance = [balance]  # 포트폴리오가 보유한 현금
        self.bitcoin = [0]          # 포트폴리오가 보유한 비트코인의 가치
        
        self.portfolio_value = [balance]
    
        self.current_price = self.chart_data.iloc[self.idx, self.MID_IDX]
        self.next_price = self.chart_data.iloc[self.idx+1, self.MID_IDX]
        
    def reset(self):
        self.idx = 0
        state = self.chart_data.iloc[self.idx]
        return state
    
    def step(self, action):
        print(self.balance[0]*self.stop_trade, self.portfolio_value[-1], self.balance[0]*self.stop_trade <= self.portfolio_value[-1])
        self.current_state = self.chart_data.iloc[self.idx]
        self.next_state = self.chart_data.iloc[self.idx+1]
        self.current_price = self.chart_data.iloc[self.idx, self.MID_IDX]
        self.next_price = self.chart_data.iloc[self.idx+1, self.MID_IDX]

        current_value = self.balance[-1] + self.bitcoin[-1]*(1+self.current_price)
        self.portfolio_value.append(current_value)
        
        # 학습이 끝나거나 만약 시드의 self.stop_trade%를 잃는다면 손절 
        if ((self.chart_data).shape[0] >= self.idx+1) and (self.balance[0]*self.stop_trade <= self.portfolio_value[-1]):
            s_prime = self.chart_data.iloc[self.idx+1]
            reward = self.get_reward(action) * 100 # -1~1 사이의 loss값이 나오도록 적절하게 scaling
            done = False
            self.idx += 1
            if reward>=0:
                return (s_prime, reward, done, self.portfolio_value[-1])
            else:
                return (s_prime, reward * self.risk_adverse, done, self.portfolio_value[-1])

        else:
            print(f'{self.current_state.name}에서 {self.portfolio_value[-1]}으로 trading stop')
            return (None,None, True, self.portfolio_value[-1])
    
    def get_reward(self, action):
        """
        action은 다음 타임스텝의 행동(매수, 매도, 홀드)을 의미한다. 
        """
        
        """     
        손실에 더 큰 penalty(risk adverse)를 줌으로써, 손실 회피형 Agent를 구성가능 
        근데 이러면, 장기적인 보상에 대해서는 감지를 잘 못하지 않나...?
        
            => GAE가 이 의문을 해결해준다고 생각함. n-step TD를 통해서 미래의 보상을 고려할 수 있음. 
            
        """        
        
        # BUY
        if action > 0:
            # 보유 현금 5% 매수
            if action == 1:
                buy_ratio = 0.5
                buy_budget = self.balance[-1] * buy_ratio
                self.balance.append(self.balance[-1]-buy_budget - buy_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]+buy_budget/self.current_price)  
        
                reward = self.next_state.iloc[self.PCT_IDX] * buy_ratio
                return reward
            
            # 보유 현금 10% 매수
            elif action == 2:
                buy_ratio = 0.10
                buy_budget = self.balance[-1] * buy_ratio
                self.balance.append(self.balance[-1]-buy_budget- buy_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]+buy_budget/self.current_price)  
                
                reward = self.next_state.iloc[self.PCT_IDX] * buy_ratio
                return reward
            
            # 보유 현금 25% 매수
            elif action == 3:
                buy_ratio = 0.25
                buy_budget = self.balance[-1] * buy_ratio
                self.balance.append(self.balance[-1]-buy_budget- buy_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]+buy_budget/self.current_price)  
            
                reward = self.next_state.iloc[self.PCT_IDX] * buy_ratio
                return reward
            
            # 보유 현금 50% 매수
            elif action == 4:
                buy_ratio = 0.5
                buy_budget = self.balance[-1] * buy_ratio
                self.balance.append(self.balance[-1]-buy_budget- buy_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]+buy_budget/self.current_price)  
                
                reward = self.next_state.iloc[self.PCT_IDX] * buy_ratio
                return reward
            
            # 보유 현금 100% 매수
            elif action == 5:
                buy_ratio = 1
                buy_budget = self.balance[-1] * buy_ratio
                self.balance.append(self.balance[-1]-buy_budget- buy_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]+buy_budget/self.current_price)  
                
                reward = self.next_state.iloc[self.PCT_IDX] * buy_ratio
                return reward
            
        # SELL
        elif action < 0:
            # 보유 coin의 5% 매도
            if action == -1:
                sell_ratio = 0.5
                sell_budget = self.bitcoin[-1] * sell_ratio
                self.balance.append(self.balance[-1]+sell_budget*self.current_price - sell_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]-sell_budget)
                
                reward = self.next_state.iloc[self.PCT_IDX] * sell_ratio
                return reward
            
            # 보유 coin의 10% 매도
            if action == -2:
                sell_ratio = 0.10
                sell_budget = self.bitcoin[-1] * sell_ratio
                self.balance.append(self.balance[-1]+sell_budget*self.current_price - sell_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]-sell_budget)
                
                reward = self.next_state.iloc[self.PCT_IDX] * sell_ratio
                return reward

            # 보유 coin의 25% 매도
            if action == -3:
                sell_ratio = 0.25
                sell_budget = self.bitcoin[-1] * sell_ratio
                self.balance.append(self.balance[-1]+sell_budget*self.current_price - sell_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]-sell_budget)
                
                reward = self.next_state.iloc[self.PCT_IDX] * sell_ratio
                return reward

            # 보유 coin의 50% 매도
            if action == -4:
                sell_ratio = 0.50
                sell_budget = self.bitcoin[-1] * sell_ratio
                self.balance.append(self.balance[-1]+sell_budget*self.current_price - sell_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]-sell_budget)
                
                reward = self.next_state.iloc[self.PCT_IDX] * sell_ratio
                return reward
            
            # 보유 coin의 100% 매도
            if action == -5:
                sell_ratio = 1
                sell_budget = self.bitcoin[-1] * sell_ratio
                self.balance.append(self.balance[-1]+sell_budget*self.current_price - sell_budget*self.transaction)
                self.bitcoin.append(self.bitcoin[-1]-sell_budget)
                
                reward = self.next_state.iloc[self.PCT_IDX] * sell_ratio
                return reward
        
        
#         elif action ==0:
#             reward = 0
            
        return reward
        

In [7]:
import os
os.getcwd()

'c:\\Users\\user\\Documents\\GitHub\\mini-project\\bitcoin_scalping_bot\\ipynb 파일'

In [8]:
df = pd.read_csv('../upbit_data/train_data_2023.csv', index_col=0)
df.head()

Unnamed: 0_level_0,60min_ago_volume,60min_ago_change,59min_ago_volume,59min_ago_change,58min_ago_volume,58min_ago_change,57min_ago_volume,57min_ago_change,56min_ago_volume,56min_ago_change,...,4min_ago_volume,4min_ago_change,3min_ago_volume,3min_ago_change,2min_ago_volume,2min_ago_change,1min_ago_volume,1min_ago_change,pct_sign,mid_price
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01 09:59:00,4.852014,0.0,2.099329,-4.7e-05,2.486127,-0.00057,1.744549,0.00038,1.433399,0.0,...,2.114237,-4.7e-05,7.926682,-0.000308,4.894037,0.000261,5.152585,-0.000498,-1,21092500.0
2023-01-01 10:00:00,2.099329,-4.7e-05,2.486127,-0.00057,1.744549,0.00038,1.433399,0.0,1.662411,-0.000712,...,7.926682,-0.000308,4.894037,0.000261,5.152585,-0.000498,2.414464,-0.000356,-1,21085000.0
2023-01-01 10:01:00,2.486127,-0.00057,1.744549,0.00038,1.433399,0.0,1.662411,-0.000712,3.212477,-9.5e-05,...,4.894037,0.000261,5.152585,-0.000498,2.414464,-0.000356,3.313988,-0.000142,-1,21082000.0
2023-01-01 10:02:00,1.744549,0.00038,1.433399,0.0,1.662411,-0.000712,3.212477,-9.5e-05,2.084449,0.000143,...,5.152585,-0.000498,2.414464,-0.000356,3.313988,-0.000142,8.203873,0.000688,1,21096500.0
2023-01-01 10:03:00,1.433399,0.0,1.662411,-0.000712,3.212477,-9.5e-05,2.084449,0.000143,2.954609,0.0,...,2.414464,-0.000356,3.313988,-0.000142,8.203873,0.000688,3.391815,-7.1e-05,-1,21095000.0


In [9]:
env =  Environment(df)

In [10]:
s = env.reset()

In [12]:
env.step(0)

95000000.0 100000000 True


UnboundLocalError: cannot access local variable 'reward' where it is not associated with a value

In [564]:
df['mid_price']

index
2023-01-01 09:00:00    21071500.0
2023-01-01 09:01:00    21070500.0
2023-01-01 09:02:00    21058500.0
2023-01-01 09:03:00    21066500.0
2023-01-01 09:04:00    21066500.0
                          ...    
2023-08-31 23:55:00    36988000.0
2023-08-31 23:56:00    36976500.0
2023-08-31 23:57:00    36977000.0
2023-08-31 23:58:00    36976500.0
2023-08-31 23:59:00    36977500.0
Name: mid_price, Length: 344438, dtype: float64