In [2]:
import numpy as np
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from sklearn.model_selection import train_test_split
from data.coin_load_data import coin_load_data
from network.transformer import TransformerPolicy
from env.envE2 import TradingEnv
from utils.visualization import generate_trading_graph
from network.transformer import TransformerPolicy

In [3]:
chart_data, training_data, merged_data = coin_load_data()
training_data = training_data.drop(columns=['date'])
train, test = train_test_split(merged_data, test_size=0.2, random_state=42, shuffle=False)

In [4]:
env = TradingEnv(train, render_mode="human")
env = Monitor(env)
env = DummyVecEnv([lambda: env])
# model = PPO("MlpPolicy", env, verbose=1, device="cuda", learning_rate=5e-4, ent_coef= 0.05)
model = PPO("MlpPolicy", env, verbose=0, device="cuda")
# model = PPO(TransformerPolicy, env, verbose=0, device="cuda")


In [5]:
model.learn(train.shape[0])

<stable_baselines3.ppo.ppo.PPO at 0x24205df6910>

In [8]:
test_env = TradingEnv(test, render_mode="human")
test_env = DummyVecEnv([lambda: test_env])
# 테스트
obs = test_env.reset()
done = False
total_reward = 0

# 초기화
buy_count = 0  # 매수 액션(1)의 개수
sell_count = 0  # 매도 액션(-1)의 개수
total_actions = 0  # 전체 액션의 개수
nc=0
while not done:
    action, _states = model.predict(obs)
    # 액션 카운트 증가
    if action == 2:
        buy_count += 1
    elif action == 0:
        sell_count += 1
    else:
        nc +=1
    total_actions += 1  # 전체 액션 수 증가

    # 환
    #경 업데이트
    obs, reward, done, _ = test_env.step(action)
    test_env.render()
    total_reward += reward

# 비율 계산
buy_ratio = buy_count / total_actions if total_actions > 0 else 0
sell_ratio = sell_count / total_actions if total_actions > 0 else 0

print(f"매수 비율: {buy_ratio:.2%}")
print(f"매도 비율: {sell_ratio:.2%}")

print(f"Total Reward: {total_reward}")

Step: 1, Price: 26579.99, Position: -1, cum_reward: 0
Step: 2, Price: 26622.46, Position: -1, cum_reward: 0
Step: 3, Price: 26630.0, Position: -1, cum_reward: 0
Step: 4, Price: 26590.32, Position: -1, cum_reward: 0
Step: 5, Price: 26600.8, Position: -1, cum_reward: 0
Step: 6, Price: 26605.87, Position: -1, cum_reward: 0
Step: 7, Price: 26620.87, Position: -1, cum_reward: 0
Step: 8, Price: 26607.57, Position: -1, cum_reward: 0
Step: 9, Price: 26641.5, Position: -1, cum_reward: 0
Step: 10, Price: 26639.98, Position: -1, cum_reward: 0
Step: 11, Price: 26650.01, Position: -1, cum_reward: 0
Step: 12, Price: 26516.0, Position: -1, cum_reward: 0
Step: 13, Price: 26466.01, Position: -1, cum_reward: 0
Step: 14, Price: 26488.01, Position: -1, cum_reward: 0
Step: 15, Price: 26473.74, Position: -1, cum_reward: 0
Step: 16, Price: 26479.99, Position: -1, cum_reward: 0
Step: 17, Price: 26443.27, Position: -1, cum_reward: 0
Step: 18, Price: 26434.05, Position: -1, cum_reward: 0
Step: 19, Price: 26415.

KeyboardInterrupt: 

In [7]:
generate_trading_graph(test_env, "visualized/trading_results.html")

In [6]:
buy_ratios = []
sell_ratios = []
total_rewards = []
test_env = TradingEnv(test, render_mode="human")
test_env = DummyVecEnv([lambda: test_env])
# 학습 반복 횟수 설정
num_iterations = 5  # 원하는 학습 반복 횟수로 설정
for iteration in range(num_iterations):
    # 학습 수행
    model.learn(train.shape[0])

    # 테스트 환경 초기화
    obs = test_env.reset()
    done = False
    total_reward = 0
    buy_count = 0
    sell_count = 0
    total_actions = 0
    nc = 0

    # 테스트 실행
    while not done:
        action, _states = model.predict(obs)       
        a = np.clip(action, -1, 1).item()
        if a > 0.5:
            buy_count += 1
        elif a < -0.5:
            sell_count += 1

        total_actions += 1
        
        # 환경 업데이트
        obs, reward, done, _ = test_env.step(action)
        total_reward += reward
    
    # 비율 계산
    buy_ratio = buy_count / total_actions if total_actions > 0 else 0
    sell_ratio = sell_count / total_actions if total_actions > 0 else 0

    # 결과 저장
    buy_ratios.append(buy_ratio)
    sell_ratios.append(sell_ratio)
    total_rewards.append(total_reward)

    print(f"Iteration {iteration + 1}: 매수 비율: {buy_ratio:.2%}, 매도 비율: {sell_ratio:.2%}, Total Reward: {total_reward}")

# 최종 결과 출력
print("\n학습 결과 변화:")
print("매수 비율:", buy_ratios)
print("매도 비율:", sell_ratios)
print("Total Rewards:", total_rewards)

KeyboardInterrupt: 