In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

/Users/victorli/Documents/CalTech/research/sustain-gym/github_repo/sustaingym


In [73]:
import numpy as np

from sustaingym.envs.evcharging.ev_charging import EVChargingEnv

period = 5
recompute_freq = 2
env = EVChargingEnv(site='caltech', date_range=["2018-11-07", "2018-11-07"], real_traces=True, period=5, recompute_freq=2)

In [46]:
observation = env.reset()

In [51]:
observation.keys()

dict_keys(['arrivals', 'est_departures', 'constraint_matrix', 'magnitudes', 'demands', 'phases', 'timestep'])

In [57]:
observation['demands']

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.], dtype=float32)

In [77]:
from typing import Any

from sustaingym.envs.evcharging.actions import to_schedule, ACTION_DISCRETIZATION_FACTOR
from copy import deepcopy

MIN_ACTION = 0
MAX_ACTION = 4


class BaseOnlineAlgorithm:
    """
    Abstract base class meant to be inherited from to implement new algorithms.

    Subclasses must implement the schedule method.

    Attributes:
        env: EV Charging Environment
    """

    def __init__(self, env: EVChargingEnv) -> None:
        self.env = deepcopy(env)

    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        """
        TODO comments
        Create a schedule of charging rates for each ev in the active_evs list.
        NOT IMPLEMENTED IN BaseAlgorithm. This method MUST be implemented in all
        subclasses.

        This method returns a schedule of charging rates for each
        Args:
            active_sessions (List[SessionInfo]): List of SessionInfo objects
                which are currently ready to be charged and not finished
                charging.
        Returns:
            Dict[str, List[float]]: Dictionary mapping a station_id to a list of
                charging rates. Each charging rate is valid for one period measured
                relative to the current period, i.e. schedule['abc'][0] is the
                charging rate for station 'abc' during the current period and
                schedule['abc'][1] is the charging rate for the next period,
                and so on. If an algorithm only produces charging rates for the
                current time period, the length of each list should be 1. If this is
                the case, make sure to also set the maximum resolve period to be 1
                period so that the algorithm will be called each period. An
                alternative is to repeat the charging rate a number of times equal to
                the max recompute period.
        """
        raise NotImplementedError
    
    def scale_demand(self, observation) -> None:
        """
        Scale observation demand by the action discretization factor.

        Normalize demands so that action outputs in {0, 1, 2, 3, 4}
        can have the same magnitude impact on decreasing demands.
        """
        observation['demands'] /= ACTION_DISCRETIZATION_FACTOR

    def run(self):
        """
        Runs the scheduling algorithm for the current period and returns the
        resulting reward.

        Returns:
            See schedule.
        """
        # if verbose:
        #     self.env
        done = False
        options = {"verbose": 1}
        obs = self.env.reset(options=options)
        self.scale_demand(obs)
        acc_reward = 0.0

        while not done:
            action = self.get_action(obs)
            obs, reward, done, info = self.env.step(action)
            self.scale_demand(obs)
            acc_reward += reward
        return acc_reward



class UncontrolledChargingAlgorithm(BaseOnlineAlgorithm):
    """
    Algortihm that charges at the maximum rate allowed.
    """
    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        """
        Charge at the maximum allowed rate by the gym for cars that
        have positive demands.
        """
        return np.where(observation["demands"] > 0, 4, 0)


class NoChargingAlgorithm(BaseOnlineAlgorithm):
    """
    Algortihm that charges at the maximum rate allowed by environment.
    """
    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        """
        Charge at the maximum allowed rate by the gym for cars that
        have positive demands.
        """
        return np.zeros(shape=observation["demands"].shape)


class DumbAlgorithm(BaseOnlineAlgorithm):
    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        return np.ones(shape=observation["demands"].shape) * MAX_ACTION

class RandomAlgorithm(BaseOnlineAlgorithm):
    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        return np.random.randint(5, size=observation["demands"].shape)

class GreedyAlgorithm(BaseOnlineAlgorithm):
    """
    Algorithm that charges at the maximum rate respecting network
    constraints.
    """
    def get_action(self, observation: dict[str, Any]) -> np.ndarray:
        """
        
        """

uncontrolled = UncontrolledChargingAlgorithm(env)
print("uncontrolled reward: ", uncontrolled.run())

no_charging = NoChargingAlgorithm(env)
print("no charging reward: ", no_charging.run())

dumb = DumbAlgorithm(env)
print("dumb reward: ", dumb.run())

random = RandomAlgorithm(env)
print("random reward: ", uncontrolled.run())


Simulating day 2018-11-07 with 36 plug in events. 
uncontrolled reward:  6099.594443284654
Simulating day 2018-11-07 with 36 plug in events. 
no charging reward:  -107.61111111111113
Simulating day 2018-11-07 with 36 plug in events. 
dumb reward:  -9644.884865405555
Simulating day 2018-11-07 with 36 plug in events. 
uncontrolled reward:  6099.594443284654


In [None]:
from acnportal.acnsim.network.sites import caltech_acn

acn = caltech_acn()
constraint_

    current_sum = np.real(simulator.network.constraint_current(schedule, linear=False))
    magnitudes = simulator.network.magnitudes
    over_current = np.maximum(current_sum - magnitudes, 0)
    constraint_punishment = - CONSTRAINT_VIOLATION_WEIGHT * sum(over_current) * timestamp_diff


In [48]:
from acnportal.acnsim.events import RecomputeEvent, PluginEvent
from acnportal.acnsim.models.ev import EV

from sustaingym.envs.evcharging.actions import ACTION_DISCRETIZATION_FACTOR

# def extract_total_info(env: EVChargingEnv):
#     num_stations, max_timestamp = env.num_stations, env.max_timestamp
#     queue = env.events._queue

#     arrivals = np.zeros(shape=(num_stations, max_timestamp), dtype=np.int32)



#     obs = {
#         "arrivals": arrivals,
#         "est_departures": est_departures,
#         "constraint_matrix": constraint_matrix,
#         "magnitudes": magnitudes,
#         "demands": demands,
#         "phases": phases,
#         "timestep": np_timestep,
#     }

for event in env.events._queue:
    if type(event[1]) == PluginEvent:
        ev: EV = event[1].ev
        print(ev.arrival, ev.departure, ev.requested_energy, ev.station_id)
        ap = env.interface._convert_to_amp_periods(ev.requested_energy, ev.station_id)
        print("amp period: ", ap)
        # convert requested energy to A * period


68 175 52.2 CA-309
amp period:  3011.5384615384614
78 222 18.0 CA-327
amp period:  1038.4615384615386
85 168 21.84 CA-321
amp period:  1260.0
86 146 20.02 CA-494
amp period:  1155.0
95 208 20.0 CA-303
amp period:  1153.8461538461538
96 230 11.5 CA-311
amp period:  663.4615384615385
100 195 8.0 CA-496
amp period:  461.53846153846155
116 216 16.0 CA-490
amp period:  923.0769230769231
100 211 20.0 CA-313
amp period:  1153.8461538461538
122 141 40.0 CA-324
amp period:  2307.6923076923076
82 193 12.52 CA-489
amp period:  722.3076923076923
128 206 27.5 CA-319
amp period:  1586.5384615384614
100 223 30.0 CA-307
amp period:  1730.769230769231
133 164 8.0 CA-500
amp period:  461.53846153846155
147 266 16.0 CA-324
amp period:  923.0769230769231
101 210 9.39 CA-304
amp period:  541.7307692307693
150 230 5.0 CA-494
amp period:  288.46153846153845
158 205 15.0 CA-503
amp period:  865.3846153846155
101 215 35.0 CA-314
amp period:  2019.230769230769
158 205 12.75 CA-149
amp period:  735.5769230769231

In [14]:
observation = env.reset()

done = False
i = 0
action = np.ones(shape=(54,), ) * j
while not done:
    observation, reward, done, info = env.step(action)
    # for x in ["charging_cost", "charging_reward", "constraint_punishment", "remaining_amp_periods_punishment"]:
        # d[x].append(info[x])
    # d["reward"].append(reward)
    print(observation['demands'][3])
    i += 1
# for k, v in d.items():
#     print(k)
#     d[k] = np.array(v)
#     print(d[k].min(), d[k].max(), d[k].mean(), d[k].sum())

print()
print()

In [15]:
obs2 = env.reset()

In [20]:
for key in obs1:
    print(np.all(obs1[key] == obs2[key]))

True
True
True
True
True
True
True
