In [1]:
from stable_baselines3 import SAC
from energy_env import EnergyMarketEnv
import itertools
import numpy as np
import warnings
import pandas as pd
import os
import plotly.graph_objects as go
warnings.filterwarnings('ignore')

In [2]:
def evaluate_model(model, env, episodes= 1000):
    rewards_per_episode = []
    for _ in range(episodes):
        state = env.reset()
        score = 0  
        for _ in range(20):
            # Get the next action using the updated exploration rate
            action, _ = model.predict(state, deterministic=True) 
            next_state, reward, done, _ = env.step(action)
            score += reward
            state = next_state
            #print(f"Aktion: {action}, Belohnung: {reward}")
            if done :
                break
        rewards_per_episode.append(score)
    return np.mean(rewards_per_episode)

### Instance
| Month       | λ     | k     | μ_WS  | σ_WS  | κ_P  | μ_P   | σ_P   |
|------------|------|------|------|------|------|------|------|
| January   | 0.127 | 1.430 | 7.145 | 5.072 | 1.035 | 40.712 | 12.693 |
| February  | 0.135 | 1.499 | 6.663 | 4.527 | 0.899 | 44.357 | 11.300 |
| March     | 0.143 | 1.598 | 6.289 | 4.030 | 1.150 | 39.112 | 18.475 |
| April     | 0.132 | 1.712 | 6.757 | 4.065 | 1.114 | 38.993 | 17.593 |
| May       | 0.129 | 1.677 | 6.943 | 4.257 | 1.441 | 34.981 | 16.075 |
| June      | 0.150 | 1.632 | 5.981 | 3.758 | 1.244 | 30.401 | 17.036 |
| July      | 0.148 | 1.653 | 6.052 | 3.760 | 1.700 | 38.202 | 16.199 |
| August    | 0.165 | 1.553 | 5.438 | 3.576 | 1.433 | 35.824 | 15.015 |

### Maximum Storage Level
| Level | Value  |
|--------|--------|
| 1      | 0.00   |
| 2      | 1.25   |
| 3      | 2.50   |
| 4      | 3.75   |
| 5      | 5.00   |
| 6      | 6.25   |
| 7      | 7.50   |

### Efficiency Charge / Efficiency Discharge  
| Efficiency Charge | Efficiency Discharge | Product (Charge * Discharge) |
|------------------|--------------------|-----------------------------|
| X              | Y                  | 0.5                         |
| X              | Y                  | 0.6                         |
| X              | Y                  | 0.7                         |
| X              | Y                  | 0.8                         |
| X              | Y                  | 0.9                         |
| X              | Y                  | 1.0                         |


In [12]:
storage_level_max, efficiency, lamda, kappa, ou_kappa, ou_mu, sigma_p =  7.5, 0.5, 0.127, 1.43, 1.035, 40.712, 12.693
env =  EnergyMarketEnv(storage_level_max, efficiency, lamda, kappa, ou_kappa, ou_mu, sigma_p)
model = SAC("MlpPolicy", env, learning_rate=3e-4)
model.learn(total_timesteps=10000)

<stable_baselines3.sac.sac.SAC at 0x2501dccc310>

In [14]:
# Speichert das Modell in 'sac_model.zip'
model.save(f"models/SAC_jan_{storage_level_max}_{efficiency}_model")

In [13]:
n_episodes = 10000
print(f"Durschnittliche Belohnung an einem Tag: {evaluate_model(model, env, n_episodes): .2f}€ über {n_episodes} Szenarien")

Durschnittliche Belohnung an einem Tag:  1173.14€ über 10000 Szenarien


####  Training von models abhängig von Storage parameters mit Wetterbedingung von Januar und Energy Preis

In [5]:
efficient_values = [0.5, 0.6, 0.7, 0.8, 0.9, 1]
storage_level_max_values = [0, 1.25, 2.5, 3.75, 5, 6.25, 7.5]
for efficient, storage_level_max in itertools.product(efficient_values, storage_level_max_values):
    env =  EnergyMarketEnv(storage_level_max, efficient, 0.127, 1.43, 1.035, 40.712, 12.693)
    model = SAC("MlpPolicy", env, learning_rate=3e-3)
    model.learn(total_timesteps=10000)
    model.save(f"models/SAC_jan_{storage_level_max}_{efficient}_model") 
    env.close()

#### Testen von trainierten Modellen über 10000 Szenarien

In [None]:
efficient_values = [0.5, 0.6, 0.7, 0.8, 0.9, 1]
storage_level_max_values = [0, 1.25, 2.5, 3.75, 5, 6.25, 7.5]
profit = []
x_axis = []
y_axis = []
n_episodes = 1000
for efficient, storage_level_max in itertools.product(efficient_values, storage_level_max_values):
    env =  EnergyMarketEnv(storage_level_max, efficient, 0.127, 1.43, 1.035, 40.712, 12.693)
    model_path = f"models/SAC_jan_{storage_level_max}_{efficient}_model"
    if os.path.exists(model_path):
        model = SAC.load(model_path, env=env)
    avg_profit = evaluate_model(model, env, n_episodes)
    profit.append(avg_profit)
    x_axis.append(storage_level_max)
    y_axis.append(efficient)
    print(f"Durschnittliche Belohnung an einem Tag: {avg_profit:.2f}€ über {n_episodes} Szenarien")
    env.close()

### save Testergebnis als Dataframe

In [25]:
df = pd.DataFrame({"Effiency":y_axis, "Storage Level": x_axis, "Profit": profit})
df.to_csv('experiment.csv', index=False)

#### Visualize Ergebnisse

In [24]:
import plotly.graph_objects as go
import numpy as np
from scipy.interpolate import griddata

# Deine Originaldaten als NumPy-Arrays
x = np.array(x_axis)  # Max Storage Level
y = np.array(y_axis)  # Efficiency
z = np.array(profit)  # Profit

# Erstelle ein regelmäßiges Raster für x und y
x_lin = np.linspace(min(x), max(x), 30)  # 30 Punkte auf der x-Achse
y_lin = np.linspace(min(y), max(y), 30)  # 30 Punkte auf der y-Achse
x_grid, y_grid = np.meshgrid(x_lin, y_lin)

# Interpolation der Profit-Daten auf das Raster
z_grid = griddata((x, y), z, (x_grid, y_grid), method='cubic')

# 3D-Plot erstellen
fig = go.Figure(data=[go.Surface(x=x_grid, y=y_grid, z=z_grid, colorscale="Viridis")])

# Achsentitel setzen
fig.update_layout(
    title="Profit als Funktion von Storage Level und Efficiency",
    scene=dict(
        xaxis_title="Max Storage Level",
        yaxis_title="Efficiency",
        zaxis_title="Profit (€)"
    ),
    width=1000,  # Breite in Pixel
    height=800   # Höhe in Pixel
)


fig.show()
