In [1]:
%run ../../Environment/environment.ipynb

e:\Studienarbeit_GitHub\Studienarbeit\Agents\PPO
Notebook ausgeführt


# 1. Bibliotheken importieren

In [2]:
import numpy as np
import pandas as pd
import optuna
import joblib
import random
from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.callbacks import CheckpointCallback
from sklearn.preprocessing import StandardScaler
from stable_baselines3.common.vec_env import DummyVecEnv
import torch

# 2. Seed setzen

In [3]:
seed = 42
SEED  = seed % (2**32 - 1)
print(f"SEED: {SEED}")

SEED: 42


# 3. Daten einlesen

In [4]:
# -------------------------------
# CSV Datem einlesen
# -------------------------------
train_data = pd.read_csv("../../Transform_data/stand_data/2023-2018_stand_data.csv")
train_data.drop('datetime', axis=1, inplace=True)

test_data = pd.read_csv("../../Transform_data/stand_data/2025-2024_stand_data.csv")
test_data.drop('datetime', axis=1, inplace=True)

if(train_data is not None and test_data is not None):
    print("Daten erfolgreich eingelesen")

Daten erfolgreich eingelesen


# 4. Parallele Umgebungen erstellen für das Training

In [5]:
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.vec_env import VecNormalize

def make_env():
    return TradingEnv(
        data=train_data,
        initial_cash=10_000,
        window_size=336,
        scaler_path="../../Transform_data/scaler.pkl",
        default_seed=SEED
    )

n_envs = 8  # Mehr parallele Umgebungen (8, 16 oder sogar 32 testen!)
env = SubprocVecEnv([make_env for _ in range(n_envs)])

env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)
env.training = True  # Sicherstellen, dass Normalisierung aktiv ist

# 5. Hyperparameter Evaluierung

In [9]:
# Für Hyperparameter-Tuning nutzen wir test_data als Validierungsdatensatz.
valid_data = test_data.copy()

# -------------------------------
# Evaluation Helper Function
# -------------------------------
def evaluate_agent(model, env, n_eval_episodes=5):
    """
    Evaluate the model over a number of episodes.
    Returns the average cumulative reward.
    """
    episode_rewards = []
    for _ in range(n_eval_episodes):
        # Handle reset return for compatibility with gym vs. gymnasium APIs
        reset_result = env.reset()
        if isinstance(reset_result, tuple):
            obs, info = reset_result
        else:
            obs = reset_result
        done = False
        total_reward = 0.0
        while not done:
            action, _states = model.predict(obs, deterministic=True)
            step_result = env.step(action)
            # Check length to handle different API outputs
            if len(step_result) == 5:
                obs, reward, done, truncated, info = step_result
            else:
                obs, reward, done, info = step_result
            total_reward += reward
        episode_rewards.append(total_reward)
    return np.mean(episode_rewards)

In [14]:
# -------------------------------
# Hyperparameter Tuning with Optuna
# -------------------------------
def objective(trial):
    # Sample hyperparameters using updated Optuna functions
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    gamma = trial.suggest_float("gamma", 0.90, 0.9999)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256])
    n_steps = trial.suggest_categorical("n_steps", [128, 256, 512, 1024])
    ent_coef = trial.suggest_float("ent_coef", 1e-6, 0.01, log=True)
    clip_range = trial.suggest_float("clip_range", 0.1, 0.4)
    gae_lambda = trial.suggest_float("gae_lambda", 0.8, 0.99)

    
    # Create the training environment using train_data
    env_train = DummyVecEnv([lambda: TradingEnv(
        data=train_data,
        initial_cash=10_000,
        window_size=336,
        scaler_path="../../Transform_data/scaler.pkl",
        default_seed=SEED
    )])
    
    # Create the validation environment using valid_data
    env_valid = DummyVecEnv([lambda: TradingEnv(
        data=valid_data,
        initial_cash=10_000,
        window_size=336,
        scaler_path="../../Transform_data/scaler.pkl",
        default_seed=SEED
    )])
    
    # Initialize the PPO model with sampled hyperparameters
    model = PPO(
        "MlpPolicy",
        env_train,
        learning_rate=learning_rate,
        gamma=gamma,
        batch_size=batch_size,
        n_steps=n_steps,
        ent_coef=ent_coef,
        clip_range=clip_range,
        gae_lambda=gae_lambda,
        verbose=0,
        seed=SEED,
    )
    
    # Train the model for a fixed number of timesteps
    model.learn(total_timesteps=10000, log_interval=1)
    
    # Evaluate the trained model on the validation environment
    mean_reward = evaluate_agent(model, env_valid, n_eval_episodes=5)
    return mean_reward

In [15]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
print("Best hyperparameters:", study.best_trial.params)

[I 2025-04-06 17:11:36,961] A new study created in memory with name: no-name-41726fc8-e193-4554-97fc-8523bae310ca


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:13:18,187] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 0.0008541333189903621, 'gamma': 0.9663429213168511, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.00011614601472383898, 'clip_range': 0.31303180109881346, 'gae_lambda': 0.894724532868429}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:14:59,561] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 0.0018022533807442135, 'gamma': 0.9945711045838953, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.00028934493037223826, 'clip_range': 0.3585110258482842, 'gae_lambda': 0.9474647455947949}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:16:24,866] Trial 2 finished with value: -0.475980281829834 and parameters: {'learning_rate': 1.768867576749866e-05, 'gamma': 0.9527647550814637, 'batch_size': 64, 'n_steps': 256, 'ent_coef': 1.2493531452963062e-05, 'clip_range': 0.21319064773669646, 'gae_lambda': 0.8671360447382713}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=128 and n_envs=1)


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:17:44,991] Trial 3 finished with value: 0.0 and parameters: {'learning_rate': 0.0007605906450479201, 'gamma': 0.9618576276986741, 'batch_size': 256, 'n_steps': 128, 'ent_coef': 0.0002201268243094233, 'clip_range': 0.22868380397681987, 'gae_lambda': 0.9206733256430781}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:19:17,032] Trial 4 finished with value: -7.506422519683838 and parameters: {'learning_rate': 0.0005721091816045841, 'gamma': 0.991909421028788, 'batch_size': 64, 'n_steps': 256, 'ent_coef': 5.501349185467948e-05, 'clip_range': 0.27128824158392717, 'gae_lambda': 0.8543505571724992}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:20:55,876] Trial 5 finished with value: -7.453783988952637 and parameters: {'learning_rate': 1.8223798414147823e-05, 'gamma': 0.9062372920252098, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.0001684529084633605, 'clip_range': 0.2827801671596349, 'gae_lambda': 0.9611096371160663}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:22:20,620] Trial 6 finished with value: -7.509558200836182 and parameters: {'learning_rate': 0.00011894468328292059, 'gamma': 0.924114754436481, 'batch_size': 64, 'n_steps': 256, 'ent_coef': 0.00010546796413278614, 'clip_range': 0.26178308884824064, 'gae_lambda': 0.928300648202877}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:23:55,770] Trial 7 finished with value: 0.0 and parameters: {'learning_rate': 0.005781459653334748, 'gamma': 0.9458110666019072, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 7.473324795382942e-05, 'clip_range': 0.268207611402309, 'gae_lambda': 0.926386503463204}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:25:41,277] Trial 8 finished with value: 0.0 and parameters: {'learning_rate': 0.008542894790599886, 'gamma': 0.9443178343281282, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0025760430440199267, 'clip_range': 0.23324879159888748, 'gae_lambda': 0.9173546875986069}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:26:56,340] Trial 9 finished with value: -7.26754093170166 and parameters: {'learning_rate': 3.0501879060355047e-05, 'gamma': 0.9771240952488512, 'batch_size': 256, 'n_steps': 512, 'ent_coef': 6.839553908702882e-06, 'clip_range': 0.3118670579949559, 'gae_lambda': 0.9685839415131362}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:28:11,688] Trial 10 finished with value: 0.0 and parameters: {'learning_rate': 0.00012111486348556782, 'gamma': 0.9706687265458617, 'batch_size': 128, 'n_steps': 1024, 'ent_coef': 1.1940189258368716e-06, 'clip_range': 0.1245301923901522, 'gae_lambda': 0.8024458688932625}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:29:48,182] Trial 11 finished with value: -7.507968902587891 and parameters: {'learning_rate': 0.0012035101896883552, 'gamma': 0.9976597632275905, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.001451046364234224, 'clip_range': 0.3747426533135968, 'gae_lambda': 0.9880511338201419}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:31:31,272] Trial 12 finished with value: 0.0 and parameters: {'learning_rate': 0.0023681740927939056, 'gamma': 0.9825410890857094, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.0007465504553709364, 'clip_range': 0.393074779489615, 'gae_lambda': 0.8802146479796477}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:33:00,841] Trial 13 finished with value: -7.505629062652588 and parameters: {'learning_rate': 0.002448143150983099, 'gamma': 0.9839031973305361, 'batch_size': 128, 'n_steps': 128, 'ent_coef': 0.0004637468721912732, 'clip_range': 0.3367075517532364, 'gae_lambda': 0.8395661575993324}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:34:42,626] Trial 14 finished with value: -6.640154838562012 and parameters: {'learning_rate': 0.0002744431832185341, 'gamma': 0.9651365992920926, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.006642374694581755, 'clip_range': 0.3468480029918305, 'gae_lambda': 0.8872045757443345}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:36:19,340] Trial 15 finished with value: -7.506422519683838 and parameters: {'learning_rate': 0.00197908119475464, 'gamma': 0.9310478775173076, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 2.802147707198394e-05, 'clip_range': 0.17768133596722213, 'gae_lambda': 0.9470060366900392}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:37:57,214] Trial 16 finished with value: 0.0 and parameters: {'learning_rate': 0.000373824088560426, 'gamma': 0.9978251482109382, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0005001744865762369, 'clip_range': 0.31087665407235093, 'gae_lambda': 0.898205582999192}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:39:18,614] Trial 17 finished with value: 0.0 and parameters: {'learning_rate': 0.004575688974760983, 'gamma': 0.9581816168852025, 'batch_size': 128, 'n_steps': 1024, 'ent_coef': 1.5402774478980154e-05, 'clip_range': 0.35648353388238185, 'gae_lambda': 0.8276322454196735}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:40:40,721] Trial 18 finished with value: -7.501458644866943 and parameters: {'learning_rate': 0.00026016362340297187, 'gamma': 0.9733190319963099, 'batch_size': 256, 'n_steps': 256, 'ent_coef': 2.388580936729306e-06, 'clip_range': 0.3098524632844004, 'gae_lambda': 0.9018156394142426}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:42:26,039] Trial 19 finished with value: 0.0 and parameters: {'learning_rate': 0.0011246498820956497, 'gamma': 0.9866967380248721, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.0027964583391857007, 'clip_range': 0.3910017264003614, 'gae_lambda': 0.9489492872783829}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:44:21,503] Trial 20 finished with value: -0.4026470184326172 and parameters: {'learning_rate': 0.00013069915247286063, 'gamma': 0.9356641648622713, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.0002621732200249227, 'clip_range': 0.32896205614676544, 'gae_lambda': 0.9408127471347928}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:45:47,759] Trial 21 finished with value: -7.505607604980469 and parameters: {'learning_rate': 0.0006569594579628905, 'gamma': 0.9625790157736136, 'batch_size': 256, 'n_steps': 128, 'ent_coef': 0.0002533171218043587, 'clip_range': 0.20010588363826595, 'gae_lambda': 0.9031814229391448}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:47:10,471] Trial 22 finished with value: 0.0 and parameters: {'learning_rate': 0.0010788217594172465, 'gamma': 0.9679422085562995, 'batch_size': 256, 'n_steps': 128, 'ent_coef': 4.17066481842463e-05, 'clip_range': 0.13075313574976033, 'gae_lambda': 0.9142097660484653}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:48:37,449] Trial 23 finished with value: 0.0 and parameters: {'learning_rate': 0.0036758932390028873, 'gamma': 0.9569302073320709, 'batch_size': 256, 'n_steps': 128, 'ent_coef': 0.00013117133667958508, 'clip_range': 0.15912145792372923, 'gae_lambda': 0.9690806162828275}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:50:06,105] Trial 24 finished with value: 0.0 and parameters: {'learning_rate': 0.0006416024965791346, 'gamma': 0.9791542788616138, 'batch_size': 256, 'n_steps': 128, 'ent_coef': 0.0010005745848952624, 'clip_range': 0.24350734233459903, 'gae_lambda': 0.8785600523406388}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:51:23,304] Trial 25 finished with value: 0.0 and parameters: {'learning_rate': 0.001518915393497778, 'gamma': 0.9390777364465377, 'batch_size': 256, 'n_steps': 256, 'ent_coef': 0.000326685476249358, 'clip_range': 0.36318425268584437, 'gae_lambda': 0.9323315559873767}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:53:05,005] Trial 26 finished with value: 0.0 and parameters: {'learning_rate': 0.0004921395485063925, 'gamma': 0.9199779358702649, 'batch_size': 64, 'n_steps': 1024, 'ent_coef': 0.000151396335603709, 'clip_range': 0.2927973766167514, 'gae_lambda': 0.9176211853195694}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:54:40,055] Trial 27 finished with value: -7.486629486083984 and parameters: {'learning_rate': 0.00020727144422685265, 'gamma': 0.9899164782170523, 'batch_size': 128, 'n_steps': 512, 'ent_coef': 3.6496099660293714e-05, 'clip_range': 0.23033140910554595, 'gae_lambda': 0.9884099597955309}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:56:36,930] Trial 28 finished with value: -7.5526275634765625 and parameters: {'learning_rate': 5.1738730305190716e-05, 'gamma': 0.9515272703999916, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 0.001625803888500342, 'clip_range': 0.2963080776479776, 'gae_lambda': 0.9547862275519493}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:58:25,783] Trial 29 finished with value: -7.506422519683838 and parameters: {'learning_rate': 0.0031879654299477116, 'gamma': 0.9586535284872408, 'batch_size': 64, 'n_steps': 256, 'ent_coef': 6.751926143817871e-06, 'clip_range': 0.22384451433008282, 'gae_lambda': 0.8649106795124023}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 17:59:57,759] Trial 30 finished with value: -7.47726583480835 and parameters: {'learning_rate': 0.0008969460175625516, 'gamma': 0.9721548113723878, 'batch_size': 256, 'n_steps': 256, 'ent_coef': 1.9546866687086226e-05, 'clip_range': 0.205976398078368, 'gae_lambda': 0.911503953260385}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:01:42,810] Trial 31 finished with value: 0.0 and parameters: {'learning_rate': 0.005505916098052546, 'gamma': 0.9467134234049951, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 7.45211924432363e-05, 'clip_range': 0.25844112602027475, 'gae_lambda': 0.9289637826741876}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:03:25,737] Trial 32 finished with value: -7.506422519683838 and parameters: {'learning_rate': 0.009763246054893057, 'gamma': 0.9510419756148495, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 7.186468923776083e-05, 'clip_range': 0.2734225908065413, 'gae_lambda': 0.9399049833335482}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:05:00,665] Trial 33 finished with value: 0.0 and parameters: {'learning_rate': 0.0059142464524945225, 'gamma': 0.9421197152658102, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 0.0001849388538582497, 'clip_range': 0.32514196058801337, 'gae_lambda': 0.8859552144193312}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:06:42,563] Trial 34 finished with value: 0.0 and parameters: {'learning_rate': 0.001905754027050823, 'gamma': 0.9285982614724523, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 0.00048517950316696287, 'clip_range': 0.2808807453099079, 'gae_lambda': 0.9254362339201707}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:08:11,875] Trial 35 finished with value: -7.506422519683838 and parameters: {'learning_rate': 0.0004144200022655539, 'gamma': 0.9122829103746104, 'batch_size': 64, 'n_steps': 256, 'ent_coef': 8.929897568736141e-05, 'clip_range': 0.25113456337626316, 'gae_lambda': 0.9620859680899041}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:09:42,202] Trial 36 finished with value: 0.0 and parameters: {'learning_rate': 0.0007959600160478686, 'gamma': 0.9544785063059852, 'batch_size': 32, 'n_steps': 1024, 'ent_coef': 5.1016896791571315e-05, 'clip_range': 0.1808744393959104, 'gae_lambda': 0.8688170582221487}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:11:15,650] Trial 37 finished with value: 0.0 and parameters: {'learning_rate': 0.006688125335985584, 'gamma': 0.9628290600959524, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0001157139220339777, 'clip_range': 0.3740490462945398, 'gae_lambda': 0.9043346413479183}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:12:51,980] Trial 38 finished with value: 0.0 and parameters: {'learning_rate': 0.0034457240270067283, 'gamma': 0.99418181206585, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.00021548034338080685, 'clip_range': 0.29382434793973033, 'gae_lambda': 0.9753477710042355}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:14:17,253] Trial 39 finished with value: -7.505629062652588 and parameters: {'learning_rate': 0.0014910406811100346, 'gamma': 0.9473639519329189, 'batch_size': 64, 'n_steps': 128, 'ent_coef': 1.0269953735058842e-05, 'clip_range': 0.26442465834131884, 'gae_lambda': 0.9347504541942437}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:15:35,924] Trial 40 finished with value: -0.36982694268226624 and parameters: {'learning_rate': 1.4782322170533173e-05, 'gamma': 0.9016290191728101, 'batch_size': 128, 'n_steps': 128, 'ent_coef': 2.38105306144612e-05, 'clip_range': 0.10148813618277716, 'gae_lambda': 0.8895159786496457}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:17:10,495] Trial 41 finished with value: 0.0 and parameters: {'learning_rate': 0.008679174615858569, 'gamma': 0.943755486569027, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.008622103688848207, 'clip_range': 0.23147173473970267, 'gae_lambda': 0.9213652299625211}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:18:44,356] Trial 42 finished with value: 0.0 and parameters: {'learning_rate': 0.0027505160738640474, 'gamma': 0.9378310980428224, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0025115991786251015, 'clip_range': 0.2428725551669319, 'gae_lambda': 0.9129192868633201}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:20:22,783] Trial 43 finished with value: 0.0 and parameters: {'learning_rate': 0.00766263402709702, 'gamma': 0.9760123129628115, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0038256782979376493, 'clip_range': 0.21072245379364848, 'gae_lambda': 0.923754010918574}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:22:05,621] Trial 44 finished with value: 0.0 and parameters: {'learning_rate': 0.0040161861097041205, 'gamma': 0.9311455496063519, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0006516485626807613, 'clip_range': 0.3416642307703015, 'gae_lambda': 0.9502982241267396}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:23:43,990] Trial 45 finished with value: 0.0 and parameters: {'learning_rate': 0.0020675775802117984, 'gamma': 0.9681755282167619, 'batch_size': 32, 'n_steps': 512, 'ent_coef': 0.0013359821168559072, 'clip_range': 0.3199498813363229, 'gae_lambda': 0.8952637161989229}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:25:18,776] Trial 46 finished with value: 0.0 and parameters: {'learning_rate': 0.004817141008092972, 'gamma': 0.9465076843462565, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.0003758748766671967, 'clip_range': 0.19508539777731698, 'gae_lambda': 0.9084140807492402}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:26:31,848] Trial 47 finished with value: -4.259600639343262 and parameters: {'learning_rate': 0.0013203361045479652, 'gamma': 0.9795858096044442, 'batch_size': 256, 'n_steps': 1024, 'ent_coef': 0.0009224619100730801, 'clip_range': 0.22055119609877294, 'gae_lambda': 0.958353561576492}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:27:51,601] Trial 48 finished with value: -7.498857021331787 and parameters: {'learning_rate': 0.00018221383852095463, 'gamma': 0.9613084670291221, 'batch_size': 128, 'n_steps': 128, 'ent_coef': 0.005355701723210471, 'clip_range': 0.3058243957907378, 'gae_lambda': 0.9446250663941415}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42
Seed in the environment: 42


[I 2025-04-06 18:29:28,017] Trial 49 finished with value: -7.507968902587891 and parameters: {'learning_rate': 0.001738987496241959, 'gamma': 0.999883694576279, 'batch_size': 32, 'n_steps': 128, 'ent_coef': 5.834324105722164e-05, 'clip_range': 0.27381516451977883, 'gae_lambda': 0.9353431271065653}. Best is trial 0 with value: 0.0.


Seed in the environment: 42
Best hyperparameters: {'learning_rate': 0.0008541333189903621, 'gamma': 0.9663429213168511, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.00011614601472383898, 'clip_range': 0.31303180109881346, 'gae_lambda': 0.894724532868429}


Best hyperparameters: {'learning_rate': 0.0008541333189903621, 'gamma': 0.9663429213168511, 'batch_size': 32, 'n_steps': 256, 'ent_coef': 0.00011614601472383898, 'clip_range': 0.31303180109881346, 'gae_lambda': 0.894724532868429}

# 6. Erstellen des Agenten

In [None]:
from torch import nn  # Für die Netzwerkarchitektur

# Definiere das neuronale Netz
policy_kwargs = dict(
    net_arch=[dict(pi=[128, 128], vf=[128, 128])],  # Zwei Layer mit 128 Neuronen
    activation_fn=nn.ReLU,  # Verwende ReLU als Aktivierungsfunktion
)

# Erstelle den PPO-Agenten ohne Hyperparametern
model_without = PPO("MlpPolicy", env, seed=SEED, verbose=1)


# Erstelle den PPO-Agenten mit Optuna-Hyperparametern
model_optuna = PPO(
    "MlpPolicy",
    env,
    learning_rate=0.0008541333189903621,
    gamma=0.9663429213168511,
    batch_size=32,
    n_steps=256,
    ent_coef=0.00011614601472383898,
    clip_range=0.31303180109881346,
    gae_lambda=0.894724532868429,
    policy_kwargs=policy_kwargs,
    verbose=1,
    seed=SEED,
    device="cuda",
)

# 7. Modell trainieren und speichern

In [None]:
model.learn(total_timesteps=10000)

model.save("ppo_trading_model")