In [1]:
import numpy as np
import pandas as pd
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from EVChargingEnv import EVChargingEnv

In [2]:
# Generate example electricity price data (c_t)
np.random.seed(456)
price_data = np.random.uniform(low=20, high=100, size=100)  # Simulated prices over 100 time steps

# Generate example EV usage probability data (p_t)
usage_prob_data = np.sin(np.linspace(0, 10, 100))  # Simulated usage probability (cyclic)
usage_prob_data = (usage_prob_data + 1) / 2  # Normalize to [0, 1]

# Convert to pandas DataFrame (optional)
data = pd.DataFrame({"Price": price_data, "Usage_Prob": usage_prob_data})
print(data.head())


       Price  Usage_Prob
0  39.900473    0.500000
1  33.045343    0.550419
2  82.691460    0.600324
3  84.681871    0.649207
4  70.050274    0.696568


In [3]:
# Initialize the environment with the example data
env = EVChargingEnv(price_data=price_data, usage_prob_data=usage_prob_data)

# Check that the environment follows the OpenAI Gym API
check_env(env, warn=True)

# Train the DQN agent
model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# Save the model
model.save("ev_charging_dqn")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 99       |
|    ep_rew_mean      | 2.68e+03 |
|    exploration_rate | 0.624    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1043     |
|    time_elapsed     | 0        |
|    total_timesteps  | 396      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 56       |
|    n_updates        | 73       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 99       |
|    ep_rew_mean      | 3.41e+03 |
|    exploration_rate | 0.248    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 965      |
|    time_elapsed     | 0        |
|    total_timesteps  | 792      |
| train/              |        

In [5]:
# Load the trained model
model = DQN.load("ev_charging_dqn")

# After training the model, run the environment with the trained model
# Reset the environment
state, _ = env.reset()

for _ in range(100):
    # Predict the action based on the state
    action, _states = model.predict(state)
    
    # Perform the action
    state, reward, done, truncated, info = env.step(action)
    
    # Render the environment
    env.render()

    # If episode is finished, reset the environment
    if done or truncated:
        state, _ = env.reset()

Time: 1, Battery Level: 0.4, Price: 33.04534265490852
Time: 2, Battery Level: 0.30000000000000004, Price: 82.69146045129821
Time: 3, Battery Level: 0.20000000000000004, Price: 84.6818711073183
Time: 4, Battery Level: 0.2, Price: 70.05027422586366
Time: 5, Battery Level: 0.2, Price: 68.3290906620339
Time: 6, Battery Level: 0.2, Price: 90.85615224919309
Time: 7, Battery Level: 0.2, Price: 80.72939742663866
Time: 8, Battery Level: 0.2, Price: 34.48840449129953
Time: 9, Battery Level: 0.2, Price: 32.013545291427114
Time: 10, Battery Level: 0.30000000000000004, Price: 54.85434969151666
Time: 11, Battery Level: 0.20000000000000004, Price: 50.82180421200717
Time: 12, Battery Level: 0.2, Price: 66.05678958461993
Time: 13, Battery Level: 0.2, Price: 31.68725555653066
Time: 14, Battery Level: 0.2, Price: 74.9274414964124
Time: 15, Battery Level: 0.2, Price: 57.5043204048983
Time: 16, Battery Level: 0.2, Price: 65.59993181327715
Time: 17, Battery Level: 0.2, Price: 71.65608118438828
Time: 18, Bat