In [4]:
import gym
import numpy as np
from sklearn.model_selection import train_test_split
from stable_baselines3 import A2C

class SimpleStockEnv(gym.Env):
    def __init__(self, df):
        super(SimpleStockEnv, self).__init__()
        self.df = df
        self.current_step = 0
        self.action_space = gym.spaces.Discrete(2)  # 0: Nicht kaufen, 1: Kaufen
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(df.shape[1] - 1,), dtype=np.float32)

    def step(self, action):
        correct_action = self.df.iloc[self.current_step]['Target']
        reward = 1 if action == correct_action else 0
        self.current_step += 1
        done = self.current_step >= len(self.df)
        obs = self.df.iloc[self.current_step][self.df.columns != 'Target'].values if not done else np.zeros(self.observation_space.shape)
        return obs, reward, done, {}

    def reset(self):
        self.current_step = 0
        return self.df.iloc[0][self.df.columns != 'Target'].values

    def render(self, mode='human'):
        pass  # Keine visuelle Darstellung notwendig



In [5]:
from ml.features.preprocessing import get_data
stock_data, last_day_df = get_data(save_data=False)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [8]:
# Teilen des DataFrames in Trainings- und Testdaten
#X = stock_data.drop(['Target'], axis=1)
train_df, test_df = train_test_split(stock_data, test_size=0.05,shuffle=False, random_state=42)

# Erstellen und Trainieren des RL-Modells auf dem Trainingsdatensatz
train_env = SimpleStockEnv(train_df)
model = A2C("MlpPolicy", train_env, verbose=1)
model.learn(total_timesteps=len(train_df))

# Testen des Modells und Berechnen der Genauigkeit
test_env = SimpleStockEnv(test_df)
obs = test_env.reset()
total_rewards = 0
for step in range(len(test_df)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = test_env.step(action)
    total_rewards += reward
    if done:
        break



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




[ 1.45990002e+03  1.02490000e+09  1.47965101e+03  1.47592905e+03
  1.48201623e+03  1.47180693e+03 -4.71799316e+01  1.49673062e+03
  4.32099464e+01 -3.13055266e+00 -7.96075804e+01  7.59500000e+08
 -2.08119299e+00  7.76985450e+00 -9.85104749e+00 -1.42730431e+02
 -7.53392246e+08  9.24323849e+01  4.31349298e+01  1.47008282e+01
  2.83552660e+01  2.39153008e+01  1.53335390e+03  1.49437100e+03
  1.45538810e+03  2.71899994e+02  3.65000000e+01  1.19650137e+00
  1.71859694e+00  1.06949997e+02  8.27680016e+00  3.86563989e+03
  1.07892900e+04  5.23309998e+02  6.40350000e+03  6.93774023e+03
  6.52995020e+03  1.61241904e+04  1.56772002e+04  4.26433984e+03
  6.63452979e+03  3.23669995e+03  1.10655000e+04  4.88299990e+00
  9.21500027e-01  5.36999989e+00  5.84999990e+00  5.96999979e+00
  5.90700006e+00  5.91400003e+00  1.45968750e+02  4.28333321e+01]
[ 1.45133997e+03  1.10400000e+09  1.47556001e+03  1.47145831e+03
  1.47909468e+03  1.46665946e+03 -4.09100342e+01  1.48905714e+03
  4.04665549e+01 -2.7415

In [9]:
accuracy = total_rewards / len(test_df)
print(f"Genauigkeit: {accuracy}")

Genauigkeit: 0.6232876712328768
