##### Copyright 2020 The TensorFlow Authors.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Actor-Critic 방법

Actor-Critic 방법은 가치 함수와 독립적인 정책 함수를 나타내는 Temporal Difference(TD) 학습 방법입니다.

정책 함수(또는 정책)는 에이전트가 주어진 상태에 따라 취할 수 있는 동작에 대한 확률 분포를 반환합니다. 가치 함수는 주어진 상태에서 시작하여 특정 정책에 따라 영원히 동작하는 에이전트의 예상 이익을 결정합니다.

Actor-Critic 방법에서 정책은 주어진 상태에 따라 가능한 일련의 동작을 제안하는 행위자라고 하며, 추정값 함수는 주어진 정책에 따라 행위자가 취한 동작을 평가하는 비평가라고 합니다.

이 튜토리얼에서 행위자*와 *비평가 모두 두 개의 출력이 있는 하나의 신경망을 사용하여 표현됩니다.

actor: 고저점 예측 모델, 고점: 1, 저점: 2, 중립: 0
       학습 및 테스트 데이터: 알고리즘2/2022-12-31/kospi200f_60M_11.csv
       3개의 action(the number of outputs)
critic: 주어진 test 구간에서의 수익률: -1 ~ 1

state: 지수(OHLC) + 거래량 + 파생 변수 + 현재 보유 상태(매수/매도, 수량)

## 설정

필요한 패키지를 가져오고 전역 설정을 구성합니다.


In [2]:
import collections
import numpy as np
import tensorflow as tf
import tqdm
import random

import make_model as tm
import profit

from matplotlib import pyplot as plt
from tensorflow.keras import layers
from typing import Any, List, Sequence, Tuple


모델

행위자와 비평가는 각각 동작 확률과 비평 값을 생성하는 하나의 신경망을 사용하여 모델링됩니다. 모델 하위 클래스화를 사용하여 모델을 
정의합니다.

순방향 전달 중에 모델은 상태를 입력으로 받고 상태 종속 값 함수를 모델링하는 동작 확률과 비평 값  𝑉를 모두 출력합니다. 
목표는 예상 이익을 최대화하는  𝜋 정책을 기반으로 행동을 선택하는 모델을 훈련하는 것입니다.

In [3]:
class ActorCritic(tf.keras.Model):
  """Combined actor-critic network."""

  def __init__(
      self, 
      num_actions: int, 
      num_hidden_units: int):
    """Initialize."""

    super().__init__()
    
    self.common1 = layers.Dense(num_hidden_units, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))
    self.common2 = layers.Dense(int(num_hidden_units / 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))
    self.common3 = layers.Dense(int(num_hidden_units / 4), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))

    self.actor = layers.Dense(num_actions, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(0.01))
    self.critic = layers.Dense(1, activation='tanh')

  def  call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    x1 = self.common1(inputs)
    x2 = layers.Dropout(0.5)(x1)
    x3 = tf.keras.layers.BatchNormalization()(x2)
    return self.actor(x3), self.critic(x3)

In [4]:
num_actions = 3
num_hidden_units = 50

model = ActorCritic(num_actions, num_hidden_units)

훈련
에이전트를 훈련하기 위해 다음 단계를 따릅니다.

1. 코스피200 60분봉 데이터 파일을 이용하여 전처리 및 taget 데이터를 만들어 학습용 파일을 만든다. 
2. 학습용 데이터을 train, test 구간으로 분리한다.
3. 훈련된 actor를 사용하여 train 구간에서 actor의 거래 시그널에 의한 n봉 까지의 실제 수익률과 critic에 의한 예측 수익률을 비교하여
   에이전트를 훈련한다.
4. test 구간에서 actor에 의한 거래 시그널을 critic의 결과에 의해 조정하여 거래를 한 후 수익률 test한다.

In [5]:
# make a normalized input data
tm.start_time = "2022/01/01/09:00"
tm.end_time = "2023/01/20/15:00"
tm.df_pred_path = 'kospi200f_60M_pred.csv'
tm.result_path = 'pred_83_results.csv'

tm.preprocessing()

nothing done! in this preprocessing


In [6]:
import pandas as pd
df0 = pd.read_csv(tm.df0_path, encoding='euc-kr')
df0 = df0.loc[df0['date'] >= tm.start_time].loc[df0['date'] <= tm.end_time]
df = pd.read_csv(tm.df_pred_path, encoding='euc-kr')
df = df.loc[df['date'] >= tm.start_time].loc[df['date'] <= tm.end_time]

### 2. 예상 이익 계산하기

한 에피소드 동안 수집된 각 타임스텝 $t$, ${r_{t}}^{T}*{t=1}$에서 보상의 시퀀스를 예상 이익 ${G*{t}}^{T}_{t=1}$의 시퀀스로 변환합니다. 여기서 보상의 합계는 현재 타임스텝 $t$에서 $T$까지 계산되며, 각 보상에 기하 급수적으로 감소하는 할인 계수 $\gamma$를 곱합니다.

$$G_{t} = \sum^{T}{t'=t} \gamma^{t'-t}r{t'}$$

$\gamma\in(0,1)$ 이후, 현재 타임스텝에서 더 멀리 떨어진 보상에는 더 적은 가중치가 부여됩니다.

직관적으로, 예상 이익은 단순히 지금 보상이 이후 보상보다 낫다는 것을 암시합니다. 이것은 수학적 의미에서 보상의 합이 수렴하도록 하려는 것입니다.

또한, 훈련을 안정화하기 위해 이익의 결과 시퀀스를 표준화합니다(즉, 평균이 0이고 단위 표준 편차를 갖도록 함).


In [7]:
# Small epsilon value for stabilizing division operations
eps = np.finfo(np.float32).eps.item()

def get_expected_return(data, 
                        raw_data, 
                        model: tf.keras.Model,
                        gamma: float, 
                        standardize: bool = True) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    
    actions, values = model(tf.convert_to_tensor(data, dtype=tf.float32))  
    pred = np.argmax(actions, axis=1).reshape(-1)
    
   
    # 종가 검색
    high = raw_data['고가'].values
    low = raw_data['저가'].values
    close = raw_data['종가'].values
    open = raw_data['시가'].values

    #  0: 정상, 1: 급락 2:급등
    pred_results = {'date': raw_data['date'].values, 'result': pred, 'open': open, 'high': high, 'low': low, 'close': close}

    pd.DataFrame(pred_results).to_csv(tm.result_path, index=False, encoding='euc-kr')
    
    profit.result_path = tm.result_path
    
    rewards = tf.convert_to_tensor(profit.calc_profit(), dtype=tf.float32)
    
    n = tf.shape(rewards)[0]
    t_actions = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    t_values = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    returns = tf.TensorArray(dtype=tf.float32, size=n)

    # Start from the end of `rewards` and accumulate reward sums
    # into the `returns` array
    rewards = tf.cast(rewards[::-1], dtype=tf.float32)
    discounted_sum = tf.constant(0.0)
    discounted_sum_shape = discounted_sum.shape
    for i in tf.range(n):
        reward = np.sign(rewards[i])
        discounted_sum = reward + gamma * discounted_sum
        discounted_sum.set_shape(discounted_sum_shape)
        returns = returns.write(i, max(min(discounted_sum, 1), -1))
        t_actions.write(i, actions[i])
        t_values.write(i, values[i])
    returns = returns.stack()[::-1]
        
    #if standardize:
    #    returns = ((returns - tf.math.reduce_mean(returns)) / 
    #             (tf.math.reduce_std(returns) + eps))
    
    return t_actions.stack(), t_values.stack(), returns

### 3. Actor-Critic 손실

여기서는 하이브리드 Actor-Critic 모델을 사용하고 있기 때문에 아래와 같이 훈련을 위해 행위자와 비평가 손실의 조합인 손실 함수를 사용합니다.

$$L = L_{actor} + L_{critic}$$

#### Actor 손실

[비평가가 상태 종속 기준선인 정책 그래디언트](https://www.youtube.com/watch?v=EKqxumCuAAY&t=62m23s)를 기반으로 행위자 손실을 공식화하고 단일 샘플(에피소드별) 추정치를 계산합니다.

$$L_{actor} = -\sum^{T}*{t=1} log\pi*{\theta}(a_{t} | s_{t})[G(s_{t}, a_{t})  - V^{\pi}*{\theta}(s*{t})]$$

여기서:

- $T$: 에피소드별로 달라질 수 있는 에피소드별 타임스텝의 수
- $s_{t}$: $t$ 타임스텝의 상태
- $a_{t}$: $s$ 상태에 따라 $t$ 타임스텝에서 선택된 동작
- $\pi_{\theta}$: $\theta$에 의해 매개변수화된 정책(행위자)
- $V^{\pi}_{\theta}$: 마찬가지로 $\theta$에 의해 매개변수화된 값 함수(비평가)
- $G = G_{t}$: 주어진 상태에 대한 예상 이익, 타임스텝 $t$에서 동작 쌍

결합된 손실을 최소화하여 보상이 더 높은 행동의 확률을 최대화하려고 하므로 합계에 음의 항을 추가합니다.

<br>

##### 이점

$L_{actor}$ 공식에서 $G - V$ 항을 [이점](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#advantage-functions)이라고 하며, 이는 특정한 상태에서 $\pi$ 정책에 따라 선택된 임의의 동작보다 이 상태에 얼마나 더 나은 동작이 주어지는지를 나타냅니다.

기준선을 제외할 수 있지만 이로 인해 훈련 중에 큰 변동이 발생할 수 있습니다. 그리고 비평가 $V$를 기준선으로 선택할 때의 좋은 점은 가능한 한 $G$에 가깝게 훈련되어 변동이 낮아진다는 것입니다.

또한, 비평가가 없으면 알고리즘이 예상 이익을 바탕으로 특정 상태에서 취하는 행동의 확률을 높이려고 시도할 것이며, 이 때 동작 사이의 상대적 확률이 같게 유지된다면 큰 차이가 생기지 않습니다.

예를 들어, 주어진 상태에서 두 행동의 예상 이익이 같다고 가정합니다. 비평가가 없으면 알고리즘은 목표 $J$에 따라 이들 동작의 확률을 높이려고 합니다. 비평가의 경우, 이점($G - V = 0$)이 없기 때문에 동작의 확률을 높이는 데 따른 이점이 없으며 알고리즘이 그래디언트를 0으로 설정합니다.

<br>

#### 비평가 손실

$V$를 $G$에 최대한 가깝게 훈련하는 것은 다음 손실 함수를 사용한 회귀 문제로 설정할 수 있습니다.

$$L_{critic} = L_{\delta}(G, V^{\pi}_{\theta})$$

여기서 $L_{\delta}$는 [Huber 손실](https://en.wikipedia.org/wiki/Huber_loss)로, 제곱 오차 손실보다 데이터의 이상 값에 덜 민감합니다.


In [8]:
huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)

def compute_loss(
    action_probs: tf.Tensor,  
    values: tf.Tensor,  
    returns: tf.Tensor) -> tf.Tensor:
  """Computes the combined actor-critic loss."""

  advantage = returns - values

  action_log_probs = tf.math.log(action_probs)
  actor_loss = -tf.math.reduce_sum(action_log_probs * advantage)

  critic_loss = huber_loss(values, returns)

  return actor_loss + critic_loss

### 4. 매개변수를 업데이트하기 위한 훈련 단계 정의하기

위의 모든 단계를 모든 에피소드에서 실행되는 훈련 단계로 결합합니다. 손실 함수로 이어지는 모든 단계는 `tf.GradientTape` 컨텍스트로 실행되어 자동 미분이 가능합니다.

Adam 옵티마이저를 사용하여 모델 매개변수에 그래디언트를 적용합니다.

또한 이 단계에서 할인되지 않은 보상의 합계인 `episode_reward`를 계산합니다. 이 단계는 나중에 성공 기준을 충족했는지 평가하는 데 사용됩니다.

`tf.function` 컨텍스트를 `train_step` 함수에 적용하여 호출 가능한 TensorFlow 그래프로 컴파일할 수 있고, 그러면 훈련 속도가 10배 빨라질 수 있습니다.


In [9]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)


#@tf.function
def train_step(
    offset: int,
    model: tf.keras.Model, 
    optimizer: tf.keras.optimizers.Optimizer, 
    max_steps: int) -> tf.Tensor:
  """Runs a model training step."""

  with tf.GradientTape() as tape:

    # get the first actions and values of your model given data   
    data = df.values[offset:offset + max_steps, 1:]
    raw_data = df0[offset:offset + max_steps]

    # Calculate expected returns
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)

    # Convert training data to appropriate TF tensor shapes
    actions, values, returns = [
        tf.expand_dims(x, 1) for x in [t_actions, t_values, returns]] 

    # Calculating loss values to update our network
    loss = compute_loss(actions, values, returns)

  # Compute the gradients from the loss
  grads = tape.gradient(loss, model.trainable_variables)

  # Apply the gradients to the model's parameters
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return tf.math.reduce_sum(returns)

In [47]:
offset = random.randrange(0, 1000)
max_steps = 100
print(train_step(offset, model, optimizer, max_steps))

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002572BF97B20>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\3195118818.py", line 3, in <module>
    print(train_step(offset, model, optimizer, max_steps))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2621869792.py", line 19, in train_step
    t_actions, t_values, returns = get_expected_return(data, raw_data, model, 0.99, True)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.

tf.Tensor(-66.775665, shape=(), dtype=float32)


In [48]:
# get the first actions and values of your model given data   
data = df.values[offset:offset + max_steps, 1:]
raw_data = df0[offset:offset + max_steps]
print(get_expected_return(data, raw_data, model, 0.99, True))

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000002571ED0CBE0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):  File "C:\Users\qhedge\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2782990522.py", line 4, in <module>
    print(get_expected_return(data, raw_data, model, 0.99, True))  File "C:\Users\qhedge\AppData\Local\Temp\ipykernel_20548\2814228888.py", line 44, in get_expected_return
    t_actions.write(i, actions[i])  File "C:\Users\qhedge\anaconda3\lib\site-packages\tensorflow\python\util\tf_should_use.py", line

(<tf.Tensor: shape=(100, 3), dtype=float32, numpy=
array([[nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, 

훈련된 모델을 test하기 위하여 훈련 데이터 이후의 데이터를 input으로 하여 actor의 action(-trading sinal)에 의한 거래 결과를 보여준다.

In [None]:
def predict(data, raw_data):
    
    actions, values = model(tf.convert_to_tensor(data, dtype=tf.float32))  
    pred = np.argmax(actions, axis=1).reshape(-1)
    
   
    # 종가 검색
    high = raw_data['고가'].values
    low = raw_data['저가'].values
    close = raw_data['종가'].values
    open = raw_data['시가'].values

    #  0: 정상, 1: 급락 2:급등
    pred_results = {'date': raw_data['date'].values, 'result': pred, 'open': open, 'high': high, 'low': low, 'close': close}

    pd.DataFrame(pred_results).to_csv(tm.result_path, index=False, encoding='euc-kr')
    
    profit.result_path = tm.result_path
    
    return profit.calc_profit().sum()

In [None]:
data = df.values[500:, 1:]
raw_data = df0[500:]
print(predict(data, raw_data))

### 5. 훈련 루프 실행하기

성공 기준 또는 최대 에피소드 수에 도달할 때까지 훈련 단계를 실행하는 방식으로 훈련을 실행합니다.

대기열을 사용하여 에피소드 보상의 실행 레코드를 유지합니다. 100회 시도에 도달하면 가장 오래된 보상이 대기열의 왼쪽 (꼬리쪽) 끝에서 제거되고 최근 보상이 머리쪽 (오른쪽)에 추가됩니다. 계산 효율을 높이기 위해 보상의 누적 합계도 유지됩니다.

런타임에 따라 훈련은 1분 이내에 완료될 수 있습니다.

In [None]:
%%time

max_episodes = 1
max_steps = 100

reward_threshold = 0.5
running_reward = 0

with tqdm.trange(max_episodes) as t:
    for i in t:

        reward = train_step(100, model, optimizer, max_steps)
        print(reward)

        if reward > reward_threshold:
            break
        
# get test data 
data = df.values[max_steps:max_steps + max_steps, 1:]
raw_data = df0[max_steps :max_steps + max_steps]

# show the prediction results
r = predict(data, raw_data)

print("손익 합계", r)

10이상 train하면 nan값으로 diverge한다.
이유를 모르겠음

## 다음 단계

이 튜토리얼에서는 Tensorflow를 사용하여 Actor-Critic 방법을 구현하는 방법을 보여주었습니다.

다음 단계로 OpenAI Gym의 다른 환경에서 모델의 훈련을 시도할 수 있습니다.

Actor-Critic 방법 및 Cartpole-v0 문제에 대한 추가 정보는 다음 리소스를 참조하세요.

- [Actor Critic 방법](https://hal.inria.fr/hal-00840470/document)
- [CAL(Actor Critic Lecture)](https://www.youtube.com/watch?v=EKqxumCuAAY&list=PLkFD6_40KJIwhWJpGazJ9VSj9CFMkb79A&index=7&t=0s)
- [카트폴 학습 제어 문제 [Barto 등 1983]](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)

TensorFlow에서 더 많은 강화 학습 예를 보려면 다음 리소스를 확인하세요.

- [강화 학습 코드 예제(keras.io)](https://keras.io/examples/rl/)
- [TF-Agents 강화 학습 라이브러리](https://www.tensorflow.org/agents)
