In [25]:
import random
import pandas as pd
import gym
import stable_baselines3
from gym import spaces
import numpy as np
from stable_baselines3 import PPO,DQN,A2C
from stable_baselines3.common.env_util import make_vec_env

class stablebaselineEnv(gym.Env):
    def __init__(self,df, usdt_balance=1000, btc_size=0, leverage=1): 
        super(stablebaselineEnv, self).__init__()
        self.action_space = spaces.Discrete(4)  # 0: Long, 1: Short, 2: Close, 3: Hold
        self.observation_space = spaces.Dict({
            "action": spaces.Discrete(4), # 행동 {0:Long, 1:Short, 2:Close, 3:Hold}
            "position": spaces.Discrete(3), # 포지션 {0:Long, 1:Short, 2:None}
            "chart_data": spaces.Box(low=0, high=np.inf, shape=(df.shape[0], df.shape[1]), dtype=np.float32),
            "current_price": spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32), # 현재 가격
            "pnl": spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32), # 미실현 손익
            "closing_pnl": spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32), # 실현 손익
            "total_pnl": spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32), # 누적 손익
            "total_balance": spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32) # 총 자산

        })

        self.df = df # 전체 차트 데이터 초기화
        self.usdt_balance = usdt_balance # 초기 usdt 잔고
        self.initial_usdt_balance = usdt_balance 
        self.btc_size = btc_size # 포지션 수량
        self.min_order = 0.002 # 최소 주문 수량
        self.fee = 0.0005 # 거래 수수료
        self.leverage = leverage # 레버리지
        self.margin = 0 # 포지션 증거금
        self.position = None # 포지션 {0:Long, 1:Short, 2:None}
        self.idx = -1

        self.order_price = 0 # 주문 금액
        self.last_size_value = 0 # (평단가 계산에 필요)
        self.current_avg_price = 0 # 현재 평단가

        self.pnl = 0 # 미실현 손익
        self.closing_pnl = 0 # 실현 손익
        self.total_pnl = 0 # 누적 손익
        self.total_balance = 0 # 총 자산

    def reset(self):
        self.usdt_balance = self.initial_usdt_balance
        self.btc_size = 0
        self.margin = 0
        self.position = None
        self.order_price = 0
        self.last_size_value = 0
        self.current_avg_price = 0
        self.pnl = 0
        self.closing_pnl = 0
        self.total_pnl = 0
        self.total_balance = 0

        self.idx = -1
        self.current_price = None

    # action = 0: Long, 1: Short, 2: Close, 3: Hold
    # position =  0: Long_position, 1: Short_position, 2: None_position
    # ex) (0.002*68000)/1=136, (0.002*68000)/2=68 필요 증거금 계산 예시 #
    def act_check(self, action): # action을 수행할 수 있는 최소한의 조건 확인
        current_price = self.current_price
        min_order = (self.min_order * current_price) / self.leverage # 레버리지 포함 주문수량
        fee = min_order * self.fee
        usdt_margin_balance = self.usdt_balance + self.margin
        if action == 0: # Long
            if self.position == 0 or 2:
                if self.usdt_balance > min_order + fee: # 현재 usdt만 계산
                    return 0
                else:
                    return 3
            elif self.position == 1:
                if usdt_margin_balance > min_order + fee: # 반대 포지션 청산후 추가되는 증거금 포함해서 계산
                    return 0
                else:
                    return 3

        elif action == 1: # Short
            if self.position == 1 or 2:
                if self.usdt_balance > min_order + fee:
                    return 1
                else:
                    return 3
                
            elif self.position == 0:
                if usdt_margin_balance > min_order + fee:
                    return 1
                else:
                    return 3
            
        elif action == 2: # Close
            if self.position == 2:
                return 3
            
        else: # Hold
            return 3

    def act(self, action):
        current_price = self.current_price # 현재 가격
        self.order_price = (self.min_order * current_price) # 주문 금액
        "추후 진입 수량 변경시에 self.order_price만 수정하면 될것같네요"
        order_margin_price = self.order_price * self.leverage # 레버리지 포함 금액 증거금 ex) (0.002*68000)*1=136, (0.002*68000)*2=68
        action = self.act_check(action)
        self.action = action
        if action == 0 and self.position is None: # Long
            if self.position == 0 or 2: # Long or None 포지션시에 진입
                self.btc_size += self.min_order # 포지션수 증가
                self.margin += self.order_price # 증거금 증가
                self.usdt_balance -= order_margin_price + (order_margin_price * self.fee) # 잔고 차감 (수수료 및 증거금)

                self.current_avg_price = (self.last_size_value + self.order_price) / self.btc_size
                self.last_size_value = self.btc_size * current_price

                self.pnl = (self.btc_size * self.current_avg_price) - (self.btc_size * current_price)
                self.closing_pnl = 0
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                self.position = 0
                pass
            
            elif self.position == 1: # Short 포지션시에 진입
                close_fee = (self.btc_size * current_price * self.fee) # 포지션 청산
                self.usdt_balance += self.margin + self.pnl - close_fee # 증거금, 실현 손익, 수수료
                self.margin = 0
                self.btc_size = 0
                self.closing_pnl = self.pnl - close_fee
                "보유 포지션 청산"
                
                self.btc_size += self.min_order # 포지션수 증가
                self.margin += self.order_price # 증거금 증가
                self.usdt_balance -= order_margin_price + (order_margin_price * self.fee) # 잔고 차감 (수수료 및 증거금)

                self.current_avg_price = (self.last_size_value + self.order_price) / self.btc_size
                self.last_size_value = self.btc_size * current_price

                self.pnl = (self.btc_size * self.current_avg_price) - (self.btc_size * current_price)
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                self.position = 0
                "새로운 포지션 진입"
                pass
            
        elif action == 1 and self.position is None: # Short
            if self.position == 1 or 2:
                self.btc_size += self.min_order # 포지션수 증가
                self.margin += self.order_price # 증거금 증가
                self.usdt_balance -= order_margin_price + (order_margin_price * self.fee) # 잔고 차감 (수수료 및 증거금)

                self.current_avg_price = (self.last_size_value + self.order_price) / self.btc_size
                self.last_size_value = self.btc_size * current_price

                self.pnl = (self.btc_size * self.current_avg_price) - (self.btc_size * current_price)
                self.closing_pnl = 0
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                self.position = 1
                pass
            elif self.position == 0:
                close_fee = (self.btc_size * current_price * self.fee) # 포지션 청산
                self.usdt_balance += self.margin + self.pnl - close_fee # 증거금, 실현 손익, 수수료
                self.margin = 0
                self.btc_size = 0
                self.closing_pnl = self.pnl - close_fee
                "보유 포지션 청산"
                
                self.btc_size += self.min_order # 포지션수 증가
                self.margin += self.order_price # 증거금 증가
                self.usdt_balance -= order_margin_price + (order_margin_price * self.fee) # 잔고 차감 (수수료 및 증거금)

                self.current_avg_price = (self.last_size_value + self.order_price) / self.btc_size
                self.last_size_value = self.btc_size * current_price

                self.pnl = (self.btc_size * self.current_avg_price) - (self.btc_size * current_price)
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                self.position = 1
                "새로운 포지션 진입"
                pass

        elif action == 2 and self.position is None: # Close
            close_fee = (self.btc_size * current_price * self.fee)
            self.usdt_balance += self.margin
            self.usdt_balance -= close_fee
            self.margin = 0
            self.btc_size = 0
            
            self.closing_pnl = self.pnl - close_fee
            self.pnl = 0
            self.total_pnl += self.closing_pnl
            self.total_balance = self.usdt_balance + self.margin
            self.position = 2
            pass
        if action == 3 and self.position is not None: # Hold
            if self.position == 0 or 1: # 포지션 보유중
                self.pnl = (self.btc_size * self.current_avg_price) - (self.btc_size * current_price)
                self.closing_pnl = 0
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                pass
            
            elif self.position == 2: # 포지션 없음
                self.pnl = 0
                self.closing_pnl = 0
                self.total_pnl += self.closing_pnl
                self.total_balance = self.usdt_balance + self.margin
                pass
            
        return self.position, self.action, self.pnl, self.closing_pnl, self.total_pnl, self.total_balance
        
        '''
        주문 수량은 일단 항상 최소 주문 금액으로 하겠습니다.
        최소 수량으로 해도 0.002개 이고 1배율일 경우 증거금 136usdt 정도 들어갑니다.
        
        추후 수량이 커질시 미결손실 또한 고려해야함
        '''

    def next_observe(self): 
        if len(self.df) > self.idx +1 :
            self.idx += 1
            self.current_price = self.df.iloc[self.idx]
            return self.current_price
        return None    

    def step(self, action):
        self.current_price = self.next_observe()
        self.current_price = random.uniform(
            self.df.loc[self.idx, 'open'],
            self.df.loc[self.idx, 'close']
            ) #현재 가격을 시가, 종가 사이 랜덤 값으로 결정됨.
        
        self.position, self.action, self.pnl, self.closing_pnl, self.total_pnl, self.total_balance = self.act(action)
        
        return self.position, self.action, self.pnl, self.closing_pnl, self.total_pnl, self.total_balance



In [8]:
df = pd.read_parquet(r'C:\Users\dyd46\Desktop\test\upbit5mindata.parquet')

In [41]:
# def __init__(self,df, usdt_balance=1000, btc_size=0, leverage=1): 
usdt_balance = 1000000
btc_size = 0
leverage = 1
env = stablebaselineEnv(df, usdt_balance, btc_size, leverage)
env.reset()

In [87]:
action = 
position, action, pnl, closing_pnl, total_pnl, total_balance = env.step(action)
print(f"action: {env.action}")
print(f"Next_chart_data: {env.current_price}")
print(f"State index: {env.idx}")
print(f"position: {position}")
print(f"krw: {env.usdt_balance}")
print(f"coin: {env.btc_size}")
print(f"avg_buy_price: {env.current_avg_price}")
print(f"pnl: {pnl}")
print(f"total: {total_balance}")


action: None
Next_chart_data: 38637790.70094483
State index: 44
position: 0
krw: 922801.42
coin: 0.002
avg_buy_price: 38580000.0
pnl: 35.46249854564667
total: 999961.42
