In [2]:
import gym
from gym import spaces
import numpy as np

In [None]:
class SmartChargeEnv(gym.Env):
    """
    Gym environment for smart charging of electric vehicle.
    The taxi arrives home at 14:00 (step 0) and leaves at 16:00 (step 8).
    At each 15-minute interval, the agent chooses a charging power level.
    The goal is to minimize charging cost and avoid running out of energy.
    """

    def __init__(self,
                 E_max=50.0,            # Maximum battery capacity in kWh
                 p_max=22.0,            # Maximum charging power in kW
                 num_bins=23,           # Discrete power levels (including zero)
                 mu=30.0, sigma=5.0,    # Demand distribution parameters (kWh)
                 fail_penalty=1000.0,   # Penalty if energy < demand at departure
                 alpha=None            # Time-based cost coefficients
                ):
        super(SmartChargeEnv, self).__init__()
        
        # 1) Environment parameters
        self.E_max = E_max
        self.p_max = p_max
        self.num_steps = int((16 - 14) * 4)  # 8 time steps of 15 minutes
        self.dt = 0.25  # 15 minutes = 0.25 hours
        self.mu = mu
        self.sigma = sigma
        self.fail_penalty = fail_penalty
        
        # Default time-varying cost coefficients if not provided
        if alpha is None:
            # e.g., higher costs during peak hours (14:00-15:00) vs off-peak
            self.alpha = np.linspace(0.1, 0.4, self.num_steps)
        else:
            assert len(alpha) == self.num_steps
            self.alpha = np.array(alpha)

        # 2) Action space: discrete power levels from 0 to p_max
        self.num_bins = num_bins
        self.action_space = spaces.Discrete(self.num_bins)
        # Map integer action -> actual power
        self.power_levels = np.linspace(0, self.p_max, self.num_bins)

        # 3) Observation space: [time_step, energy]
        low = np.array([0.0, 0.0], dtype=np.float32)
        high = np.array([self.num_steps - 1, self.E_max], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

        # 4) Initialize state
        self.state = None  # will hold [time_step, energy]
        self.done = False
        
    

In [None]:
def reset(self):
        """
        Reset the environment to initial state:
          - time_step = 0 (14:00)
          - energy = current charge (assume starting at 0 or user-defined)
        """
        # For simplicity, we start with an empty battery each day
        t0 = 0
        E0 = 0.0
        self.state = np.array([t0, E0], dtype=np.float32)
        self.done = False
        return self.state

    def step(self, action):
        """
        Execute one time step in the environment:
        1) Map action index to charging power
        2) Update energy with charging, respecting E_max
        3) Compute cost = -exp(alpha[t]) * power
        4) If final step, sample demand and apply penalty if needed
        5) Increment time, check done flag
        """
        t, E = self.state
        t = int(t)

        # 1) Get charging power from action index
        p = float(self.power_levels[action])

        # 2) Update energy
        E_next = min(E + p * self.dt, self.E_max)

        # 3) Charging cost (negative reward)
        cost = -np.exp(self.alpha[t]) * p

        reward = cost

        # 4) Check terminal condition at final time step
        if t == self.num_steps - 1:
            # Sample stochastic demand at departure
            demand = np.random.normal(self.mu, self.sigma)
            # Apply high penalty if battery < demand
            if E_next < demand:
                reward -= self.fail_penalty
            self.done = True
        
        # 5) Advance time
        t_next = t + 1 if not self.done else t
        self.state = np.array([t_next, E_next], dtype=np.float32)

        info = {"energy": E_next}
        return self.state, reward, self.done, info

    def render(self, mode='human'):
        t, E = self.state
        print(f"Time Step: {int(t)}/{self.num_steps}, Energy: {E:.2f} kWh")





In [None]:
if __name__ == "__main__":
    # Quick sanity check
    env = SmartChargeEnv()
    obs = env.reset()
    done = False
    total_reward = 0.0
    while not done:
        action = env.action_space.sample()  # random policy
        obs, r, done, info = env.step(action)
        total_reward += r
        env.render()
    print("Total Reward:", total_reward)  # should be negative or penalized