In [1]:
#라이브러리 install (주석 풀고 최초 1회만 실행)
!pip install mplfinance

Collecting mplfinance
  Downloading mplfinance-0.12.10b0-py3-none-any.whl (75 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/75.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mplfinance
Successfully installed mplfinance-0.12.10b0


In [2]:
import threading
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.switch_backend('agg')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 0: 모든 로그를 출력합니다. (기본값)
# 1: INFO 로그를 제외하고 출력합니다.
# 2: WARNING 로그를 제외하고 출력합니다.
# 3: ERROR 로그를 제외하고 출력합니다.

import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)  # TensorFlow 로그 중 ERROR 수준만 출력
import abc
import collections
import threading
import time
import datetime
import json
import numpy as np
from tqdm.notebook import tqdm
import sys

from mplfinance.original_flavor import candlestick_ohlc

import warnings
warnings.filterwarnings('ignore')

In [3]:
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
logging.getLogger('tensorflow').setLevel(logging.FATAL)  # TensorFlow 로그를 완전히 억제
tf.get_logger().setLevel('FATAL')

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Conv1D,BatchNormalization, Dropout, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam

tf.__version__

'2.15.0'

In [4]:
# 날짜, 시간 관련 문자열 형식
FORMAT_DATE = "%Y%m%d"
FORMAT_DATETIME = "%Y%m%d%H%M%S"

def get_time_str():
    return datetime.datetime.fromtimestamp(
        int(time.time())).strftime(FORMAT_DATETIME)

def sigmoid(x):
    x = max(min(x, 10), -10)
    return 1. / (1. + np.exp(-x))

def softmax(x):
    e_x=np.exp(x -np.max(x))
    return e_x/e_x.sum(axis=0)

# 로거 이름
LOGGER_NAME = 'rltrader'
BASE_DIR = '/content/drive/MyDrive/rl_trading'


## 드라이브에서 데이터 불러오기


In [5]:
# 코랩 개인 드라이브 연결
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
CHART_DATA_COLUMNS = ['datetime', 'open', 'high', 'low', 'close', 'volume'] #시간,시고저종,거래량

class load_data:

  #csv파일에 있는데이터는 이미 전처리 완료 되어있음
  def __init__(self):
      self.chart_data=None
      self.train_data=None

  def read_csv(self,code=None,start_day=None,end_day=None):
      df=pd.read_csv(f'{BASE_DIR}/data/{code}_train.csv')
      df = df[(df['datetime']>start_day)&(df['datetime']<end_day)]

      chart_data=df[CHART_DATA_COLUMNS]
      train_data=df.drop(['datetime', 'open', 'high', 'low', 'close'],axis=1)
      return chart_data,train_data

  def predict_read_csv(self,code=None,start_day=None,end_day=None):
      df=pd.read_csv(f'{BASE_DIR}/data/{code}_predict.csv')
      df = df[(df['datetime']>start_day)&(df['datetime']<end_day)]

      chart_data=df[CHART_DATA_COLUMNS]
      train_data=df.drop(['datetime', 'open', 'high', 'low', 'close'],axis=1)
      return chart_data,train_data

## 강화학습 Environment 클래스
###   agent에게 현재 차트 정보 전달하는 클래스

In [7]:
class Environment:
    PRICE_IDX = 4  # 차트데이터에서 종가의 위치

    def __init__(self, chart_data=None):
        self.chart_data = chart_data
        self.observation = None  #에이전트에게 전달한 (분봉,일봉) 차트정보
        self.idx = -1

    def reset(self):  #관찰 초기화 처음으로 이동
        self.observation = None
        self.idx = -1

    def observe(self):  #다음 관찰
        if len(self.chart_data) > (self.idx + 1):
            self.idx += 1
            self.observation = self.chart_data.iloc[self.idx]
            return self.observation
        return None

    def get_price(self):  #현재 종가 반환
        if self.observation is not None:
            return self.observation.iloc[self.PRICE_IDX]
        return None

## Agent 클래스
### 예측값으로 매수 매도 결정 , 거래액수 결정, 상태갱신


In [8]:

class Agent:
    # 에이전트 상태가 구성하는 값 개수
    # 주식 보유 비율, 손익률, 주당 매수 단가 대비 주가 등락률
    STATE_DIM = 3

    # 매매 수수료 및 세금
    TRADING_CHARGE = 0.00015  # 거래 수수료 0.015% +
    TRADING_TAX = 0.0018  # 거래세 0.18%
    TRADING_HOGA = 0.0017  # 호가 0.17%

    # 행동
    ACTION_BUY = 0  # 매수
    ACTION_SELL = 1  # 매도
    ACTION_HOLD = 2  # 관망
    # 인공 신경망에서 확률을 구할 행동들
    ACTIONS = [ACTION_BUY, ACTION_SELL, ACTION_HOLD]
    NUM_ACTIONS = len(ACTIONS) #3  # 인공 신경망에서 고려할 출력값의 개수

    def __init__(self, environment, initial_balance, min_trading_price, max_trading_price):
        # 현재 주식 가격을 가져오기 위해 환경 참조
        self.environment = environment   #Environment(chart_data) 클래스
        self.initial_balance = initial_balance  # 초기 자본금

        self.min_trading_price = min_trading_price # 최소 단일 매매 금액
        self.max_trading_price = max_trading_price # 최대 단일 매매 금액

        # Agent 클래스의 속성
        self.balance = initial_balance  # 현재 현금 잔고
        self.num_stocks = 0  # 보유 주식 수
        self.portfolio_value = 0 # 포트폴리오 가치: balance + num_stocks * {현재 주식 가격}

        self.num_buy = 0  # 매수 횟수
        self.num_sell = 0  # 매도 횟수
        self.num_hold = 0  # 관망 횟수

        # Agent 클래스의 상태  STATE_DIM=3
        self.ratio_hold = 0  # 주식 보유 비율 (내자산에서 주식으로 들고있는 비율 주식수*종가 /pv)
        self.profitloss = 0  # 손익률
        self.avg_buy_price = 0  # 주당 매수 단가

    def reset(self):  #다음 에피소드를 위해 초기화
        self.balance = self.initial_balance
        self.num_stocks = 0
        self.portfolio_value = self.initial_balance
        self.num_buy = 0
        self.num_sell = 0
        self.num_hold = 0
        self.ratio_hold = 0
        self.profitloss = 0
        self.avg_buy_price = 0

        #에이전트의 상태 반환  train_data에 추가됨
    def get_states(self):
        self.ratio_hold = self.num_stocks * self.environment.get_price() \
            / self.portfolio_value
        return (
            self.ratio_hold,
            self.profitloss,
            (self.environment.get_price() / self.avg_buy_price) - 1 \
                if self.avg_buy_price > 0 else 0
        )

    def decide_action(self, pred_value, pred_policy, epsilon):
        confidence = 0.

        pred = pred_policy
        if pred is None:
            pred = pred_value

        if pred is None:
            epsilon = 1     #정책, 가치 순으로 값이 없으면 탐험
        else:
            #매수,매도,관망의 예측값이 모두 같은 경우 탐험
            maxpred = np.max(pred)
            if (pred == maxpred).all():
                epsilon = 1

        # 탐험 결정
        if np.random.rand() < epsilon:
            exploration = True
            action = np.random.randint(self.NUM_ACTIONS)  #탐험일경우 무작위 행동
        else:
            exploration = False
            action = np.argmax(pred)

        confidence = .5
        if pred_policy is not None:
            confidence = pred[action]
        elif pred_value is not None:
            confidence = sigmoid(pred[action])

        return action, confidence, exploration

    def validate_action(self, action):
        if action == Agent.ACTION_BUY:
            # 적어도 1주를 살 수 있는지 확인
            if self.balance < self.environment.get_price()*(1+self.TRADING_HOGA) * (1 + self.TRADING_CHARGE):
                return False
        elif action == Agent.ACTION_SELL:
            # 주식 잔고가 있는지 확인
            if self.num_stocks <= 0:
                return False
        return True

    def decide_trading_unit(self, confidence):
        if np.isnan(confidence):
            return self.min_trading_price
        added_trading_price = max(min(int(confidence * (self.max_trading_price - self.min_trading_price))
                                      ,self.max_trading_price-self.min_trading_price), 0)
        trading_price = self.min_trading_price + added_trading_price
        return max(int(trading_price / self.environment.get_price()), 1)  #구매할 주식수 리턴

    def act(self, action, confidence):
        if not self.validate_action(action):
            action = Agent.ACTION_HOLD

        # 환경에서 현재 가격 얻기
        curr_price = self.environment.get_price()

        # 매수
        if action == Agent.ACTION_BUY:
            # 매수할 단위를 판단
            trading_unit = self.decide_trading_unit(confidence)
            balance = (
                self.balance - curr_price *(1+self.TRADING_HOGA)*(1 + self.TRADING_CHARGE) * trading_unit
            )
            # 보유 현금이 모자랄 경우 보유 현금으로 가능한 만큼 최대한 매수
            if balance < 0:
                trading_unit = min(
                    int(self.balance / (curr_price *(1+self.TRADING_HOGA) * (1 + self.TRADING_CHARGE))),
                    int(self.max_trading_price / curr_price)
                )
            # 수수료를 적용하여 총 매수 금액 산정
            invest_amount = curr_price *(1+self.TRADING_HOGA) * (1 + self.TRADING_CHARGE) * trading_unit
            if invest_amount > 0:
                self.avg_buy_price = \
                    (self.avg_buy_price * self.num_stocks + (curr_price *(1+self.TRADING_HOGA) * (1 + self.TRADING_CHARGE) )* trading_unit) \
                        / (self.num_stocks + trading_unit)  # 주당 매수 단가 갱신
                self.balance -= invest_amount  # 보유 현금을 갱신
                self.num_stocks += trading_unit  # 보유 주식 수를 갱신
                self.num_buy += 1  # 매수 횟수 증가

        # 매도
        elif action == Agent.ACTION_SELL:
            # 매도할 단위를 판단
            trading_unit = self.decide_trading_unit(confidence)
            # 보유 주식이 모자랄 경우 가능한 만큼 최대한 매도
            trading_unit = min(trading_unit, self.num_stocks)  #trading_unit은 num_stock보다 작거나 같다.
            # 매도
            invest_amount = curr_price*(1-self.TRADING_HOGA) * (1 - (self.TRADING_TAX + self.TRADING_CHARGE)) * trading_unit
            if invest_amount > 0:
                # 주당 매수 단가 갱신
                self.avg_buy_price = \
                    (self.avg_buy_price * self.num_stocks - (curr_price*(1-self.TRADING_HOGA) * (1 - (self.TRADING_TAX + self.TRADING_CHARGE))) * trading_unit) \
                        / (self.num_stocks - trading_unit) \
                            if self.num_stocks > trading_unit else 0
                self.num_stocks -= trading_unit  # 보유 주식 수를 갱신
                self.balance += invest_amount  # 보유 현금을 갱신
                self.num_sell += 1  # 매도 횟수 증가

        # 관망
        elif action == Agent.ACTION_HOLD:
            self.num_hold += 1  # 관망 횟수 증가

        # 포트폴리오 가치 갱신
        self.portfolio_value = self.balance + curr_price * self.num_stocks
        self.profitloss = self.portfolio_value / self.initial_balance - 1
        return self.profitloss


## 결과 시각화 Visualizer 함수
### 차트 , agent, value,policy, pv

In [9]:
class Visualizer:
    COLORS = ['r', 'b', 'g']  #매수, 매도, 관망

    def __init__(self):
        self.canvas = None
        # 캔버스 같은 역할을 하는 Matplotlib의 Figure 클래스 객체
        self.fig = None
        # 차트를 그리기 위한 Matplotlib의 Axes 클래스 객체
        self.axes = None
        self.title = ''  # 그림 제목
        self.x = []
        self.xticks = []
        self.xlabels = []

    def prepare(self, chart_data, title):
        self.title = title
        with lock:
            # 캔버스를 초기화하고 5개의 차트를 그릴 준비
            self.fig, self.axes = plt.subplots(
                nrows=5, ncols=1, facecolor='w', sharex=True, figsize=(50, 15))

            for ax in self.axes:
                # 보기 어려운 과학적 표기 비활성화
                ax.get_xaxis().get_major_formatter() \
                    .set_scientific(False)
                ax.get_yaxis().get_major_formatter() \
                    .set_scientific(False)
                # y axis 위치 오른쪽으로 변경
                ax.yaxis.tick_right()
            # 차트 1. 일봉 차트
            self.axes[0].set_ylabel('Env.')  # y 축 레이블 표시
            x = np.arange(len(chart_data))
            # open, high, low, close 순서로된 2차원 배열
            ohlc = np.hstack((
                x.reshape(-1, 1), np.array(chart_data)[:, 1:-1]))
            # 양봉은 빨간색으로 음봉은 파란색으로 표시
            candlestick_ohlc(self.axes[0], ohlc, colorup='r', colordown='b')
            # 거래량 가시화
            ax = self.axes[0].twinx()
            volume = np.array(chart_data)[:, -1].tolist()
            ax.bar(x, volume, color='b', alpha=0.3)
            # x축 설정
            self.x = np.arange(len(chart_data['datetime']))
            self.xticks = chart_data.index[[0, -1]]
            self.xlabels = chart_data.iloc[[0, -1]]['datetime']

    def plot(self, epoch_str=None, num_epoches=None, epsilon=None,
            action_list=None, actions=None, num_stocks=None,
            outvals_value=[], outvals_policy=[], exps=None,
            initial_balance=None, pvs=None):
        with lock:
            actions = np.array(actions)  # 에이전트의 행동 배열
            # 가치 신경망의 출력 배열
            outvals_value = np.array(outvals_value)
            # 정책 신경망의 출력 배열
            outvals_policy = np.array(outvals_policy)
            # 초기 자본금 배열
            pvs_base = np.zeros(len(actions)) + initial_balance

            # 차트 2. 에이전트 상태 (행동, 보유 주식 수)
            for action, color in zip(action_list, self.COLORS):
                for i in self.x[actions == action]:
                    # 배경 색으로 행동 표시
                    self.axes[1].axvline(i, color=color, alpha=0.1)
            self.axes[1].plot(self.x, num_stocks, '-k')  # 보유 주식 수 그리기

            # 차트 3. 가치 신경망
            if len(outvals_value) > 0:
                max_actions = np.argmax(outvals_value, axis=1)
                for action, color in zip(action_list, self.COLORS):
                    # 배경 그리기
                    for idx in self.x:
                        if max_actions[idx] == action:
                            self.axes[2].axvline(idx, color=color, alpha=0.1)
                    # 가치 신경망 출력 그리기
                    self.axes[2].plot(self.x, outvals_value[:, action],
                        color=color, linestyle='-')

            # 차트 4. 정책 신경망
            # 탐험을 노란색 배경으로 그리기
            for exp_idx in exps:
                self.axes[3].axvline(exp_idx, color='y')
            # 행동을 배경으로 그리기
            _outvals = outvals_policy if len(outvals_policy) > 0 else outvals_value
            for idx, outval in zip(self.x, _outvals):
                color = 'white'
                if np.isnan(outval.max()):
                    continue
                if outval.argmax() == Agent.ACTION_BUY:
                    color = self.COLORS[0]  # 매수 빨간색
                elif outval.argmax() == Agent.ACTION_SELL:
                    color = self.COLORS[1]  # 매도 파란색
                elif outval.argmax() == Agent.ACTION_HOLD:
                    color = self.COLORS[2]  # 관망 초록색
                self.axes[3].axvline(idx, color=color, alpha=0.1)
            # 정책 신경망의 출력 그리기
            if len(outvals_policy) > 0:
                for action, color in zip(action_list, self.COLORS):
                    self.axes[3].plot(
                        self.x, outvals_policy[:, action],
                        color=color, linestyle='-')

            # 차트 5. 포트폴리오 가치
            self.axes[4].axhline(
                initial_balance, linestyle='-', color='gray')  #시작금액 기준선
            self.axes[4].fill_between(self.x, pvs, pvs_base,
                where=pvs > pvs_base, facecolor='r', alpha=0.1) #수익
            self.axes[4].fill_between(self.x, pvs, pvs_base,
                where=pvs < pvs_base, facecolor='b', alpha=0.1) #손해
            self.axes[4].plot(self.x, pvs, '-k')
            self.axes[4].xaxis.set_ticks(self.xticks)
            self.axes[4].xaxis.set_ticklabels(self.xlabels)

            # 에포크 및 탐험 비율
            self.fig.suptitle(f'{self.title}\nEPOCH:{epoch_str}/{num_epoches} EPSILON:{epsilon:.2f}')
            # 캔버스 레이아웃 조정
            self.fig.tight_layout()
            self.fig.subplots_adjust(top=0.85)

    def clear(self, xlim):
        with lock:
            _axes = self.axes.tolist()
            for ax in _axes[1:]:
                ax.cla()  # 그린 차트 지우기
                ax.relim()  # limit를 초기화
                ax.autoscale()  # 스케일 재설정
            # y축 레이블 재설정
            self.axes[1].set_ylabel('Agent')
            self.axes[2].set_ylabel('V')
            self.axes[3].set_ylabel('P')
            self.axes[4].set_ylabel('PV')
            for ax in _axes:
                ax.set_xlim(xlim)  # x축 limit 재설정
                ax.get_xaxis().get_major_formatter() \
                    .set_scientific(False)  # x축의 과학적 표기 비활성화
                ax.get_yaxis().get_major_formatter() \
                    .set_scientific(False)  # y축의 과학적 표기 비활성화
                # x축 간격을 일정하게 설정
                ax.ticklabel_format(useOffset=False)

    def save(self, path):
        with lock:
            self.fig.savefig(path)


## 신경망 Network 클래스
### train_data+agent_state를 넣어서 policy_pred, value_pred를 구하는 클래스

In [10]:
class Network:
    lock = threading.Lock()

    def __init__(self, input_dim=0, output_dim=0, lr=0.001,
                shared_network=None, activation='sigmoid', loss='mse'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.lr = lr
        self.shared_network = shared_network
        self.activation = activation
        self.loss = loss
        self.model = None

    def predict(self, sample):
        with self.lock:
            pred = self.model.predict_on_batch(sample).flatten()
            return pred

    def train_on_batch(self, x, y):
        loss = 0.
        with self.lock:
            history = self.model.fit(x, y, epochs=10, verbose=False)
            loss += np.sum(history.history['loss'])
        return loss

    def save_model(self, model_path):
        if model_path is not None and self.model is not None:
            self.model.save_weights(model_path, overwrite=True)

    def load_model(self, model_path):
        if model_path is not None:
            self.model.load_weights(model_path)

    @classmethod
    def get_shared_network(cls, net='lstm', num_steps=1, input_dim=0, output_dim=0):
        # output_dim은 pytorch에서 필요
        if net == 'lstm':
            return LSTMNetwork.get_network_head(Input((num_steps, input_dim)))
        elif net == 'cnn':
            return CNN.get_network_head(Input((num_steps, input_dim)))

class LSTMNetwork(Network):
    def __init__(self, *args, num_steps=1, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_steps = num_steps
        inp = None
        output = None
        if self.shared_network is None:
            inp = Input((self.num_steps, self.input_dim))
            output = self.get_network_head(inp).output
        else:
            inp = self.shared_network.input
            output = self.shared_network.output
        output = Dense(
            self.output_dim, activation=self.activation,
            kernel_initializer='random_normal')(output)
        self.model = Model(inp, output)
        self.model.compile(
            optimizer=Adam(learning_rate=self.lr), loss=self.loss)

    @staticmethod
    def get_network_head(inp):
        output = LSTM(256, dropout=0.1, return_sequences=True,
                    kernel_initializer='random_normal')(inp)
        output = BatchNormalization()(output)
        output = LSTM(128, dropout=0.1, return_sequences=True,
                    kernel_initializer='random_normal')(output)
        output = BatchNormalization()(output)
        output = LSTM(64, dropout=0.1, return_sequences=True,
                    kernel_initializer='random_normal')(output)
        output = BatchNormalization()(output)
        output = LSTM(32, dropout=0.1, kernel_initializer='random_normal')(output)
        output = BatchNormalization()(output)
        return Model(inp, output)

    def train_on_batch(self, x, y):
        x = np.array(x).reshape((-1, self.num_steps, self.input_dim))
        return super().train_on_batch(x, y)

    def predict(self, sample):
        sample = np.array(sample).reshape((1, self.num_steps, self.input_dim))
        return super().predict(sample)


class CNN(Network):
    def __init__(self, *args, num_steps=1, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_steps = num_steps
        inp = None
        output = None
        if self.shared_network is None:
            inp = Input((self.num_steps, self.input_dim, 1))
            output = self.get_network_head(inp).output
        else:
            inp = self.shared_network.input
            output = self.shared_network.output
        output = Dense(
            self.output_dim, activation=self.activation,
            kernel_initializer='random_normal')(output)
        self.model = Model(inp, output)
        self.model.compile(
            optimizer=Adam(learning_rate=self.lr), loss=self.loss)

    @staticmethod
    def get_network_head(inp):
        output = Conv1D(256, kernel_size=5,
            padding='same', activation='sigmoid',
            kernel_initializer='random_normal')(inp)
        output = BatchNormalization()(output)
        output = MaxPooling1D(pool_size=2, padding='same')(output)
        output = Dropout(0.1)(output)
        output = Conv1D(64, kernel_size=5,
            padding='same', activation='sigmoid',
            kernel_initializer='random_normal')(output)
        output = BatchNormalization()(output)
        output = MaxPooling1D(pool_size=2, padding='same')(output)
        output = Dropout(0.1)(output)
        output = Conv1D(32, kernel_size=5,
            padding='same', activation='sigmoid',
            kernel_initializer='random_normal')(output)
        output = BatchNormalization()(output)
        output = MaxPooling1D(pool_size=2, padding='same')(output)
        output = Dropout(0.1)(output)
        output = Flatten()(output)
        return Model(inp, output)

    def train_on_batch(self, x, y):
        x = np.array(x).reshape((-1, self.num_steps, self.input_dim, 1))
        return super().train_on_batch(x, y)

    def predict(self, sample):
        sample = np.array(sample).reshape(
            (-1, self.num_steps, self.input_dim, 1))
        return super().predict(sample)


In [11]:
logger = logging.getLogger(LOGGER_NAME)

class ReinforcementLearner:
    __metaclass__ = abc.ABCMeta
    lock = threading.Lock()

    def __init__(self, rl_method='rl', stock_code=None,
                chart_data=None, training_data=None,
                min_trading_price=100000, max_trading_price=10000000,
                net='lstm', num_steps=1, lr=0.0005,
                discount_factor=0.9, num_epoches=1000,
                balance=100000000, start_epsilon=1,
                value_network=None, policy_network=None,
                value_network_activation='linear', policy_network_activation='softmax',
                output_path='', reuse_models=True, gen_output=True):
        # 인자 확인
        assert min_trading_price > 0
        assert max_trading_price > 0
        assert max_trading_price >= min_trading_price
        assert num_steps > 0
        assert lr > 0
        # 강화학습 설정
        self.rl_method = rl_method
        self.discount_factor = discount_factor
        self.num_epoches = num_epoches
        self.start_epsilon = start_epsilon
        # 환경 설정
        self.stock_code = stock_code
        self.chart_data = chart_data
        self.environment = Environment(chart_data)
        # 에이전트 설정
        self.agent = Agent(self.environment, balance, min_trading_price, max_trading_price)
        # 학습 데이터
        self.training_data = training_data
        self.sample = None
        self.training_data_idx = -1
        # 벡터 크기 = 학습 데이터 벡터 크기 + 에이전트 상태 크기
        self.num_features = self.agent.STATE_DIM
        if self.training_data is not None:
            self.num_features += self.training_data.shape[1]
        # 신경망 설정
        self.net = net
        self.num_steps = num_steps
        self.lr = lr
        self.value_network = value_network
        self.policy_network = policy_network
        self.reuse_models = reuse_models
        self.value_network_activation = value_network_activation
        self.policy_network_activation = policy_network_activation
        # 가시화 모듈
        self.visualizer = Visualizer()
        # 메모리
        self.memory_sample = []
        self.memory_action = []
        self.memory_reward = []
        self.memory_value = []
        self.memory_policy = []
        self.memory_pv = []
        self.memory_num_stocks = []
        self.memory_exp_idx = []
        # 에포크 관련 정보
        self.loss = 0.
        self.itr_cnt = 0
        self.exploration_cnt = 0
        self.batch_size = 0
        # 로그 등 출력 경로
        self.epoch_summary_dir=None
        self.output_path = output_path
        self.gen_output = gen_output

    def init_value_network(self, shared_network=None, loss='mse'):
        if self.net == 'lstm':
            self.value_network = LSTMNetwork(
                input_dim=self.num_features,
                output_dim=self.agent.NUM_ACTIONS,
                lr=self.lr, num_steps=self.num_steps,
                shared_network=shared_network,
                activation=self.value_network_activation, loss=loss)
        elif self.net == 'cnn':
            self.value_network = CNN(
                input_dim=self.num_features,
                output_dim=self.agent.NUM_ACTIONS,
                lr=self.lr, num_steps=self.num_steps,
                shared_network=shared_network,
                activation=self.value_network_activation, loss=loss)
        if self.reuse_models and os.path.exists(self.value_network_path):
            self.value_network.load_model(model_path=self.value_network_path)

    def init_policy_network(self, shared_network=None, loss='categorical_crossentropy'):
        if self.net == 'lstm':
            self.policy_network = LSTMNetwork(
                input_dim=self.num_features,
                output_dim=self.agent.NUM_ACTIONS,
                lr=self.lr, num_steps=self.num_steps,
                shared_network=shared_network,
                activation=self.policy_network_activation, loss=loss)
        elif self.net == 'cnn':
            self.policy_network = CNN(
                input_dim=self.num_features,
                output_dim=self.agent.NUM_ACTIONS,
                lr=self.lr, num_steps=self.num_steps,
                shared_network=shared_network,
                activation=self.policy_network_activation, loss=loss)
        if self.reuse_models and os.path.exists(self.policy_network_path):
            self.policy_network.load_model(model_path=self.policy_network_path)

    def reset(self):
        self.sample = None
        self.training_data_idx = -1
        # 환경 초기화
        self.environment.reset()
        # 에이전트 초기화
        self.agent.reset()
        # 가시화 초기화
        self.visualizer.clear([0, len(self.chart_data)])
        # 메모리 초기화
        self.memory_sample = []
        self.memory_action = []
        self.memory_reward = []
        self.memory_value = []
        self.memory_policy = []
        self.memory_pv = []
        self.memory_num_stocks = []
        self.memory_exp_idx = []
        # 에포크 관련 정보 초기화
        self.loss = 0.
        self.itr_cnt = 0
        self.exploration_cnt = 0
        self.batch_size = 0

    def build_sample(self):
        self.environment.observe()
        if len(self.training_data) > self.training_data_idx + 1:
            self.training_data_idx += 1
            self.sample = self.training_data.iloc[self.training_data_idx].tolist()
            self.sample.extend(self.agent.get_states())
            return self.sample  #train_data 와 현재 agent의 상태
        return None

    @abc.abstractmethod
    def get_batch(self):
        pass

    def fit(self):
        # 배치 학습 데이터 생성
        x, y_value, y_policy = self.get_batch()
        # 손실 초기화
        self.loss = None
        if len(x) > 0:
            loss = 0
            if y_value is not None:
                # 가치 신경망 갱신
                loss += self.value_network.train_on_batch(x, y_value)
            if y_policy is not None:
                # 정책 신경망 갱신
                loss += self.policy_network.train_on_batch(x, y_policy)
            self.loss = loss

    def visualize(self, epoch_str, num_epoches, epsilon):
        self.memory_action = [Agent.ACTION_HOLD] * (self.num_steps - 1) + self.memory_action
        self.memory_num_stocks = [0] * (self.num_steps - 1) + self.memory_num_stocks
        if self.value_network is not None:
            self.memory_value = [np.array([np.nan] * len(Agent.ACTIONS))] \
                                * (self.num_steps - 1) + self.memory_value
        if self.policy_network is not None:
            self.memory_policy = [np.array([np.nan] * len(Agent.ACTIONS))] \
                                * (self.num_steps - 1) + self.memory_policy
        self.memory_pv = [self.agent.initial_balance] * (self.num_steps - 1) + self.memory_pv
        self.visualizer.plot(
            epoch_str=epoch_str, num_epoches=num_epoches,
            epsilon=epsilon, action_list=Agent.ACTIONS,
            actions=self.memory_action,
            num_stocks=self.memory_num_stocks,
            outvals_value=self.memory_value,
            outvals_policy=self.memory_policy,
            exps=self.memory_exp_idx,
            initial_balance=self.agent.initial_balance,
            pvs=self.memory_pv,
        )
        self.visualizer.save(os.path.join(self.epoch_summary_dir, f'epoch_summary_{epoch_str}.png'))

    def run(self, learning=True):
        info = (
            f'[{self.stock_code}] RL:{self.rl_method} NET:{self.net} '
            f'LR:{self.lr} DF:{self.discount_factor} '
        )

        with self.lock:
            logger.debug(info)
        # 시작 시간
        time_start = time.time()

        # 가시화 준비
        # 차트 데이터는 변하지 않으므로 미리 가시화
        print(f'{self.stock_code}__ 차트데이터 만들기')
        self.visualizer.prepare(self.environment.chart_data, info)

        # 가시화 결과 저장할 폴더 준비
        if self.gen_output:
            self.epoch_summary_dir = os.path.join(self.output_path, f'epoch_summary_{self.stock_code}')
            if not os.path.isdir(self.epoch_summary_dir):
                os.makedirs(self.epoch_summary_dir)
            else:
                for f in os.listdir(self.epoch_summary_dir):
                    os.remove(os.path.join(self.epoch_summary_dir, f))
            ##===========================================================================================
            if self.value_network_path is not None:
                self.epoch_value_network_dir = os.path.join(self.value_network_path, f'epoch_summary_{self.stock_code}_value_network')
                if not os.path.isdir(self.epoch_value_network_dir):
                    os.makedirs(self.epoch_value_network_dir)
                else:
                    for f in os.listdir(self.epoch_value_network_dir):
                        os.remove(os.path.join(self.epoch_value_network_dir, f))
            ##===========================================================================================
            if self.policy_network_path is not None:
                self.epoch_policy_network_dir = os.path.join(self.policy_network_path, f'epoch_summary_{self.stock_code}_policy_network')
                if not os.path.isdir(self.epoch_policy_network_dir):
                    os.makedirs(self.epoch_policy_network_dir)
                else:
                    for f in os.listdir(self.epoch_policy_network_dir):
                        os.remove(os.path.join(self.epoch_policy_network_dir, f))


        # 학습에 대한 정보 초기화
        max_portfolio_value = 0
        epoch_win_cnt = 0

        # 에포크 반복
        for epoch in tqdm(range(self.num_epoches)):

            time_start_epoch = time.time()

            # step 샘플을 만들기 위한 큐
            q_sample = collections.deque(maxlen=self.num_steps) #num_step 크기만큼만 데이터를 저장

            # 환경, 에이전트, 신경망, 가시화, 메모리 초기화
            self.reset()

            # 학습을 진행할 수록 탐험 비율 감소
            if learning:
                epsilon = self.start_epsilon * (1 - (epoch / (self.num_epoches - 1)))
            else:
                epsilon = self.start_epsilon

            print(f'stock_code : {self.stock_code}__episode : {epoch}__에이전트 거래 시작')
            for i in tqdm(range(len(self.training_data)), leave=False):
                # 샘플 생성
                next_sample = self.build_sample()  #train_data 와 현재 agent의 상태
                if next_sample is None:
                    break

                # num_steps만큼 샘플 저장
                q_sample.append(next_sample)
                if len(q_sample) < self.num_steps:
                    continue

                # 가치, 정책 신경망 예측
                pred_value = None
                pred_policy = None
                if self.value_network is not None:
                    pred_value = self.value_network.predict(list(q_sample))
                if self.policy_network is not None:
                    pred_policy = self.policy_network.predict(list(q_sample))
                # 신경망 또는 탐험에 의한 행동 결정
                action, confidence, exploration = \
                    self.agent.decide_action(pred_value, pred_policy, epsilon)

                # 결정한 행동을 수행하고 보상 획득
                reward = self.agent.act(action, confidence) #self.profitloss = self.portfolio_value / self.initial_balance - 1

                # 행동 및 행동에 대한 결과를 기억
                self.memory_sample.append(list(q_sample))
                self.memory_action.append(action)   #[매수, 매도, 관망] 중 한개
                self.memory_reward.append(reward)
                if self.value_network is not None:
                    self.memory_value.append(pred_value)
                if self.policy_network is not None:
                    self.memory_policy.append(pred_policy)
                self.memory_pv.append(self.agent.portfolio_value)
                self.memory_num_stocks.append(self.agent.num_stocks)
                if exploration:
                    self.memory_exp_idx.append(self.training_data_idx)

                # 반복에 대한 정보 갱신
                self.batch_size += 1
                self.itr_cnt += 1
                self.exploration_cnt += 1 if exploration else 0

            # 에포크 종료 후 학습
            if learning:
                self.fit()

            # 에포크 관련 정보 로그 기록
            num_epoches_digit = len(str(self.num_epoches))
            epoch_str = str(epoch).rjust(num_epoches_digit, '0')
            time_end_epoch = time.time()
            elapsed_time_epoch = time_end_epoch - time_start_epoch
            logger.debug(f'[{self.stock_code}][Epoch {epoch_str}/{self.num_epoches}] '
                f'Epsilon:{epsilon:.4f} #Expl.:{self.exploration_cnt}/{self.itr_cnt} '
                f'#Buy:{self.agent.num_buy} #Sell:{self.agent.num_sell} #Hold:{self.agent.num_hold} '
                f'#Stocks:{self.agent.num_stocks} PV:{self.agent.portfolio_value:,.0f} '
                f'Loss:{self.loss:.6f} ET:{elapsed_time_epoch:.4f}')

            # 에포크 관련 정보 가시화
            if self.gen_output:
                    self.visualize(epoch_str, self.num_epoches, epsilon)

            # 학습 관련 정보 갱신
            max_portfolio_value = max(
                max_portfolio_value, self.agent.portfolio_value)
            if self.agent.portfolio_value > self.agent.initial_balance:
                epoch_win_cnt += 1

            # 에피소드로 생성된 모델 저장
            if self.value_network is not None and self.value_network_path is not None:
                self.value_network.save_model(os.path.join(self.epoch_value_network_dir ,f'value_network_{epoch}.h5'))
            if self.policy_network is not None and self.policy_network_path is not None:
                self.policy_network.save_model(os.path.join(self.epoch_policy_network_dir ,f'policy_network_{epoch}.h5'))
        #=======================================================================================

        # 종료 시간
        time_end = time.time()
        elapsed_time = time_end - time_start

        # 학습 관련 정보 로그 기록
        with self.lock:
            logger.debug(f'[{self.stock_code}] Elapsed Time:{elapsed_time:.4f} '
                f'Max PV:{max_portfolio_value:,.0f} #Win:{epoch_win_cnt}')

    def save_models(self):
        if self.value_network is not None and self.value_network_path is not None:
            self.value_network.save_model(self.value_network_path)
        if self.policy_network is not None and self.policy_network_path is not None:
            self.policy_network.save_model(self.policy_network_path)

    def predict(self):
        # 에이전트 초기화
        self.agent.reset()
        info = (
            f'[{self.stock_code}] RL:{self.rl_method} NET:{self.net} '
            f'LR:{self.lr} DF:{self.discount_factor} '
        )
        self.epoch_summary_dir = self.output_path
        self.visualizer.prepare(self.environment.chart_data, info)
        # step 샘플을 만들기 위한 큐
        q_sample = collections.deque(maxlen=self.num_steps)
        result = []
        while True:
            # 샘플 생성
            next_sample = self.build_sample()
            if next_sample is None:
                break

            # num_steps만큼 샘플 저장
            q_sample.append(next_sample)
            if len(q_sample) < self.num_steps:
                continue

            # 가치, 정책 신경망 예측
            pred_value = None
            pred_policy = None
            if self.value_network is not None:
                pred_value = self.value_network.predict(list(q_sample))
            if self.policy_network is not None:
                pred_policy = self.policy_network.predict(list(q_sample))

            # 신경망 또는 탐험에 의한 행동 결정
            action, confidence, exploration = self.agent.decide_action(pred_value, pred_policy, self.start_epsilon)

            # 결정한 행동을 수행하고 보상 획득
            reward = self.agent.act(action, confidence) #self.profitloss = self.portfolio_value / self.initial_balance - 1

            # 행동 및 행동에 대한 결과를 기억
            self.memory_sample.append(list(q_sample))
            self.memory_action.append(action)   #[매수, 매도, 관망] 중 한개
            self.memory_reward.append(reward)
            if self.value_network is not None:
                self.memory_value.append(pred_value)
            if self.policy_network is not None:
                self.memory_policy.append(pred_policy)
            self.memory_pv.append(self.agent.portfolio_value)
            self.memory_num_stocks.append(self.agent.num_stocks)
            if exploration:
                self.memory_exp_idx.append(self.training_data_idx)

        if self.gen_output:
          self.visualize('predict', self.num_epoches, self.start_epsilon)
          with open(os.path.join(self.output_path, f'pred_{self.stock_code}.json'), 'w') as f:
            print(json.dumps(result), file=f)

        return result

In [12]:
class DQNLearner(ReinforcementLearner):
    def __init__(self, *args, value_network_path=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.value_network_path = value_network_path
        self.init_value_network()

    def get_batch(self):
        memory = zip(
            reversed(self.memory_sample),  #최신 경험부터 역순으로 접근하여 최근의 경험이 중요하게 작동하도록 함
            reversed(self.memory_action),  #매수, 매도, 관망
            reversed(self.memory_value),   #train data를 신경망으로 예측한 값
            reversed(self.memory_reward),  #portfolio_value / self.initial_balance - 1
        )
        x = np.zeros((len(self.memory_sample), self.num_steps, self.num_features))
        y_value = np.zeros((len(self.memory_sample), self.agent.NUM_ACTIONS))
        value_max_next = 0
        for i, (sample, action, value, reward) in enumerate(memory):
            x[i] = sample
            r = self.memory_reward[-1] - reward
            y_value[i] = value
            y_value[i, action] = r + self.discount_factor * value_max_next  #바로전에 가장 좋았던 가치
            value_max_next = value.max()
        return x, y_value, None

In [13]:
class ActorCriticLearner(ReinforcementLearner):
    def __init__(self, *args, shared_network=None,
        value_network_path=None, policy_network_path=None, **kwargs):
        super().__init__(*args, **kwargs)
        if shared_network is None:
            self.shared_network = Network.get_shared_network(
                net=self.net, num_steps=self.num_steps,
                input_dim=self.num_features,
                output_dim=self.agent.NUM_ACTIONS)
        else:
            self.shared_network = shared_network
        self.value_network_path = value_network_path
        self.policy_network_path = policy_network_path
        if self.value_network is None:
            self.init_value_network(shared_network=self.shared_network)
        if self.policy_network is None:
            self.init_policy_network(shared_network=self.shared_network)

    def get_batch(self):
        memory = zip(
            reversed(self.memory_sample),
            reversed(self.memory_action),
            reversed(self.memory_value),
            reversed(self.memory_policy),
            reversed(self.memory_reward),
        )
        x = np.zeros((len(self.memory_sample), self.num_steps, self.num_features))
        y_value = np.zeros((len(self.memory_sample), self.agent.NUM_ACTIONS))
        y_policy = np.zeros((len(self.memory_sample), self.agent.NUM_ACTIONS))
        value_max_next = 0
        for i, (sample, action, value, policy, reward) in enumerate(memory):
            x[i] = sample
            r = self.memory_reward[-1] - reward
            y_value[i, :] = value
            y_value[i, action] = r + self.discount_factor * value_max_next
            y_policy[i, :] = policy
            y_policy[i, action] = softmax(r)
            value_max_next = value.max()
        return x, y_value, y_policy


class A2CLearner(ActorCriticLearner):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def get_batch(self):
        memory = zip(
            reversed(self.memory_sample),
            reversed(self.memory_action),
            reversed(self.memory_value),
            reversed(self.memory_policy),
            reversed(self.memory_reward),
        )
        x = np.zeros((len(self.memory_sample), self.num_steps, self.num_features))
        y_value = np.zeros((len(self.memory_sample), self.agent.NUM_ACTIONS))
        y_policy = np.zeros((len(self.memory_sample), self.agent.NUM_ACTIONS))
        value_max_next = 0
        reward_next = self.memory_reward[-1]
        for i, (sample, action, value, policy, reward) in enumerate(memory):
            x[i] = sample
            r = reward_next + self.memory_reward[-1] - reward * 2
            reward_next = reward
            y_value[i, :] = value
            y_value[i, action] = np.tanh(r + self.discount_factor * value_max_next)
            advantage = y_value[i, action] - y_value[i].mean()
            y_policy[i, :] = policy
            y_policy[i, action] = softmax(advantage)
            value_max_next = value.max()
        return x, y_value, y_policy


class A3CLearner(ReinforcementLearner):
    def __init__(self, *args, list_stock_code=None,
        list_chart_data=None, list_training_data=None,
        list_min_trading_price=None, list_max_trading_price=None,
        value_network_path=None, policy_network_path=None,
        **kwargs):
        assert len(list_training_data) > 0
        super().__init__(*args, **kwargs)
        self.num_features += list_training_data[0].shape[1]

        # 공유 신경망 생성
        self.shared_network = Network.get_shared_network(
            net=self.net, num_steps=self.num_steps,
            input_dim=self.num_features,
            output_dim=self.agent.NUM_ACTIONS)
        self.value_network_path = value_network_path
        self.policy_network_path = policy_network_path
        if self.value_network is None:
            self.init_value_network(shared_network=self.shared_network)
        if self.policy_network is None:
            self.init_policy_network(shared_network=self.shared_network)

        # A2CLearner 생성
        self.learners = []
        for (stock_code, chart_data, training_data,
            min_trading_price, max_trading_price) in zip(
                list_stock_code, list_chart_data, list_training_data,
                list_min_trading_price, list_max_trading_price
            ):
            learner = A2CLearner(*args,
                stock_code=stock_code, chart_data=chart_data,
                training_data=training_data,
                min_trading_price=min_trading_price,
                max_trading_price=max_trading_price,
                shared_network=self.shared_network,
                value_network=self.value_network,
                policy_network=self.policy_network, **kwargs)
            self.learners.append(learner)

    def run(self, learning=True):
        threads = []
        for learner in self.learners:
            threads.append(threading.Thread(
                target=learner.run, daemon=True, kwargs={'learning': learning}
            ))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()

    def predict(self):
        threads = []
        for learner in self.learners:
            threads.append(threading.Thread(
                target=learner.predict, daemon=True
            ))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()

In [14]:
# 학습기 파라미터 설정
rl='A3C'
net='lstm'
output_name = f'{"train"}_{get_time_str()}_{rl}_{net}'
learning = 'train' in ['train', 'update']
reuse_models = 'test' in ['test', 'update', 'predict']
value_network_name = f'{get_time_str()}_{rl}_{net}_value'
policy_network_name = f'{get_time_str()}_{rl}_{net}_policy'
start_epsilon = 0.5 if 'train' in ['train', 'update'] else 0
num_epoches = 100 if 'train' in ['train', 'update'] else 1
num_steps = 1 if 'lstm' in ['lstm', 'cnn'] else 1

# 출력 경로 생성
output_path = os.path.join(BASE_DIR, 'output', output_name)
if not os.path.isdir(output_path):
    os.makedirs(output_path)


# 모델 경로 준비
value_network_path = os.path.join(BASE_DIR, 'models', value_network_name)
policy_network_path = os.path.join(BASE_DIR, 'models', policy_network_name)

# 로그 기록 설정
log_path = os.path.join(output_path, f'{output_name}.log')
if os.path.exists(log_path):
    os.remove(log_path)
logging.basicConfig(format='%(message)s')
logger = logging.getLogger(LOGGER_NAME)
logger.setLevel(logging.DEBUG)
logger.propagate = False
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(logging.INFO)
file_handler = logging.FileHandler(filename=log_path, encoding='utf-8')
file_handler.setLevel(logging.DEBUG)
logger.addHandler(stream_handler)
logger.addHandler(file_handler)

common_params = {}
list_stock_code = ['247540','005490']
list_chart_data = []
list_training_data = []
list_min_trading_price = []
list_max_trading_price = []


# 최소/최대 단일 매매 금액 설정
min_trading_price = 5000000
max_trading_price = 50000000

for stock_code in list_stock_code:
  chart_data, training_data =load_data().read_csv(stock_code,start_day='2023-07-03',end_day='2023-07-22')

  assert len(chart_data) >= num_steps

  list_chart_data.append(chart_data)
  list_training_data.append(training_data)
  list_min_trading_price.append(min_trading_price)
  list_max_trading_price.append(max_trading_price)
  # 공통 파라미터 설정
  common_params={ 'rl_method':rl,
                  'net':net,'num_steps':num_steps, 'lr':0.001,
                  'discount_factor':0.9,
                  'num_epoches':num_epoches,
                  'balance':50000000,
                  'start_epsilon':start_epsilon,
                  'output_path':output_path,
                  'reuse_models':reuse_models
                }

# 강화학습 시작
learner_A3C = None

learner_A3C = A3CLearner(**
                        {**common_params,
                        'list_stock_code': list_stock_code,
                        'list_chart_data': list_chart_data,
                        'list_training_data': list_training_data,
                        'list_min_trading_price': list_min_trading_price,
                        'list_max_trading_price': list_max_trading_price,
                        'value_network_path': value_network_path,
                        'policy_network_path': policy_network_path})

assert learner_A3C is not None

In [None]:
lock = threading.Lock()
learner_A3C.run()
learner_A3C.save_models()

247540__ 차트데이터 만들기
005490__ 차트데이터 만들기


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

stock_code : 247540__episode : 0__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 0__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 1__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 1__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 2__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 2__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 3__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 3__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 4__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 4__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 5__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 5__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 6__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 6__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 7__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 7__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 8__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 8__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 9__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 9__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 10__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 10__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 11__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 11__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 12__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 12__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 13__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 13__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 14__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 14__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 15__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 15__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 16__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 16__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 17__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 17__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 18__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 18__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 19__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 19__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]

stock_code : 247540__episode : 20__에이전트 거래 시작


  0%|          | 0/5698 [00:00<?, ?it/s]

stock_code : 005490__episode : 20__에이전트 거래 시작


  0%|          | 0/5699 [00:00<?, ?it/s]