In [1]:
%run ../Environment/environment.ipynb

Notebook ausgeführt


In [9]:
import gym
import numpy as np
import pandas as pd
import joblib
import random
from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.callbacks import CheckpointCallback
from sklearn.preprocessing import StandardScaler
from stable_baselines3.common.vec_env import DummyVecEnv

seed = 42
SEED  = seed % (2**32 - 1)
print(f"SEED: {SEED}")

# -------------------------------
# CSV Datem einlesen
# -------------------------------
data = pd.read_csv("../Transform_data/stand_data/2023-2018_stand_data.csv")
data.drop('datetime', axis=1, inplace=True)

# -------------------------------
# TradingEnv erstellen (bitte sicherstellen, dass TradingEnv importiert oder im gleichen Skript definiert ist)
# -------------------------------
env = TradingEnv(
    data=data,
    initial_cash=10_000,
    window_size=14,
    scaler_path="../Transform_data/scaler.pkl",
    default_seed=SEED
)

# -------------------------------
# PPO-Agenten initialisieren
# -------------------------------
model = PPO(
    "MlpPolicy", 
    env, 
    verbose=1, 
    seed=SEED, 
    tensorboard_log="./tensorboard_log/"
)

# Optional: Checkpoint Callback um den Trainingsfortschritt zwischendurch zu speichern
checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./logs/', name_prefix='ppo_trading')

# -------------------------------
# Training
# -------------------------------
model.learn(
    total_timesteps=10000, 
    #callback=checkpoint_callback, 
    log_interval=1
)

# Speichere das trainierte Modell
model.save("ppo_trading_model")

print("Beginn der Vorhersage")



SEED: 42
Seed in the environment: 42
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Seed in the environment: 42
Logging to ./tensorboard_log/PPO_22
-----------------------------
| time/              |      |
|    fps             | 782  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 620         |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008870797 |
|    clip_fraction        | 0.0483      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.19       |
|    explained_variance   | -10.984461  |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0314      |
|    n_

In [None]:
# -------------------------------
# Testlauf: Den trainierten Agenten in einer Episode ausführen
# -------------------------------
obs, info = env.reset(seed=SEED)
done = False

# Liste der actionen
action_list = []

for _ in range(500):
#while not done:
    # Bestimme die Aktion (deterministisch)
    action, _states = model.predict(obs, deterministic=True)
    action = int(action)  # oder: action = action.item()
    obs, reward, done, truncated, info = env.step(action)
    action_list.append(action)

# Hier wird der Zustand gerendert (z.B. als Plot). Du kannst den Render-Modus anpassen.
env.render(mode='human')
print(action_list)