In [1]:
import matplotlib.pyplot as plt
import numpy as np
import gym
import random

In [2]:
env = gym.make('ElectricCars:electric-v0')

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


# Q-Learning Agent with all the actions

In [3]:
env.action_space.sample()

8

In [4]:
env.action_space.n

70

In [5]:
env.observation_space.sample()

array([45.63666 , 20.678097], dtype=float32)

In [6]:
def create_bins(num_bins_per_obs=60):#every obs above will have 60 bins. Like energy_capacity is in [0, 60], divide by 60
    bins_energy_capacity = np.linspace(0.0,60.001,num_bins_per_obs)
    bins_tank_size = np.linspace(59.8,60.001,num_bins_per_obs) 
    
    bins = np.array([bins_tank_size, bins_energy_capacity])
    
    return bins

In [7]:
NUM_BINS = 60
BINS = create_bins(NUM_BINS)
BINS

array([[59.8       , 59.80340678, 59.80681356, 59.81022034, 59.81362712,
        59.8170339 , 59.82044068, 59.82384746, 59.82725424, 59.83066102,
        59.8340678 , 59.83747458, 59.84088136, 59.84428814, 59.84769492,
        59.85110169, 59.85450847, 59.85791525, 59.86132203, 59.86472881,
        59.86813559, 59.87154237, 59.87494915, 59.87835593, 59.88176271,
        59.88516949, 59.88857627, 59.89198305, 59.89538983, 59.89879661,
        59.90220339, 59.90561017, 59.90901695, 59.91242373, 59.91583051,
        59.91923729, 59.92264407, 59.92605085, 59.92945763, 59.93286441,
        59.93627119, 59.93967797, 59.94308475, 59.94649153, 59.94989831,
        59.95330508, 59.95671186, 59.96011864, 59.96352542, 59.9669322 ,
        59.97033898, 59.97374576, 59.97715254, 59.98055932, 59.9839661 ,
        59.98737288, 59.99077966, 59.99418644, 59.99759322, 60.001     ],
       [ 0.        ,  1.0169661 ,  2.0339322 ,  3.05089831,  4.06786441,
         5.08483051,  6.10179661,  7.11876271,  8.

In [8]:
def discretize_obs(observations,bins):
    binned_observations = []
    
    for i,obs in enumerate(observations):
        discretized_obs = np.digitize(obs, bins[i])
        binned_observations.append(discretized_obs)
    return tuple(binned_observations)

In [68]:
discretize_obs([59.9970,48], BINS)

(58, 48)

In [69]:
q_table_shape = (NUM_BINS,NUM_BINS, env.action_space.n)
q_table = np.zeros(q_table_shape)
q_table.shape

(60, 60, 70)

In [70]:
def epsilon_greedy_action_selection(epsilon, q_table, discrete_state):
    
    random_number = random.random()
    
    #Exploitation (choose the action that maximizes Q)
    
    if random_number > epsilon:
        
        state_row = q_table[discrete_state[0],discrete_state[1],0:]
        action = np.argmax(state_row) #index position of max value of the row. 
        
    
    #Exploration (choose a random action)
    else:
        action = env.action_space.sample()
        
    return action

In [8]:
def compute_next_q_value(old_q_value, reward, next_optimal_q_value):
    return old_q_value + ALPHA*(reward + GAMMA*next_optimal_q_value - old_q_value)

In [73]:
def reduce_epsilon(epsilon, epoch):
    return min_epsilon + (max_epsilon-min_epsilon)*np.exp(-decay_rate*epoch)

In [74]:
def reduce_alpha(epoch):
    return min_alpha + (max_alpha-min_alpha)*np.exp(-decay_rate_alpha*epoch)

In [75]:
def reduce_gamma(epoch):
    return 0.1 + (0.4)*np.exp(-decay_rate_gamma*epoch)

In [78]:
EPOCHS = 60000 #Episodes --> how many times the agent will go over one-year cycle, i.e., will reach done.
min_alpha = 0.001
max_alpha = 0.2
decay_rate_alpha = 0.00002
decay_rate_gamma = 0.001
ALPHA = 0.2 #Learning parameter
GAMMA = 0.5 #Discount rate Gammar, Gamma^2r, Gamma^3 ... 

In [79]:
epsilon = 1.0 
max_epsilon = 1.0
min_epsilon = 0.01
decay_rate = 0.0001
EPSILON_REDUCE = 0.001

#exploration vs. exploitation. Epsilon will decay by time means the agent will reduce exploration. 
#This means as it gets closer to the optimal solution it will exploit what it learned instead of exploring new ways.

In [80]:
env.reset()
rewards = []


for episode in range(EPOCHS):
    
    initial_state = env.reset()
    discretized_state = discretize_obs(initial_state, BINS)
    done = False
    total_rewards = 0
    total_cost = 0
    GAMMA = 0.5
    
    #Go over the cycle
    while not done:        
        
        
        action = epsilon_greedy_action_selection(epsilon,q_table,discretized_state)
        
        
        next_state, reward, done, info = env.step(action)
        
        next_state_discretized = discretize_obs(next_state,BINS)
        
        old_q_value = q_table[discretized_state + (action,)]
        
        next_optimal_q_value = np.max(q_table[next_state_discretized])
        
        next_q_value = compute_next_q_value(old_q_value, reward, next_optimal_q_value)
        
        q_table[discretized_state + (action,)] = next_q_value
        
        total_rewards = total_rewards + reward
        total_cost = total_cost + info['Cost']
        
        discretized_state = next_state_discretized
        GAMMA = reduce_gamma(episode)
        
        
    epsilon = reduce_epsilon(epsilon, episode)
    ALPHA = reduce_alpha(episode)
    
    
    if episode % 100 == 0:
        print(f'State:{next_state}; Total rewards: {total_rewards}, info:{info}, total_cost:{total_cost}, eps:{epsilon}, alpha: {ALPHA}, episode:{episode}')      
        

      

State:[59.911026  5.248458]; Total rewards: 23996.45741763903, info:{'Day': 358, 'ChargType': 8, 'OrAct': 8, 'ChargAftCharging': 5.248457925005752, 'Cost': 0}, total_cost:993.4861823609933, eps:1.0, alpha: 0.2, episode:0
State:[59.9976   27.590403]; Total rewards: 739.3691663342008, info:{'Day': 12, 'ChargType': 41, 'OrAct': 41, 'ChargAftCharging': 29.490401671608403, 'Cost': 0.247}, total_cost:30.62943366579919, eps:0.9901493354116764, alpha: 0.1996023977347993, episode:100
State:[59.9976   27.590403]; Total rewards: 722.6559906891284, info:{'Day': 12, 'ChargType': 19, 'OrAct': 21, 'ChargAftCharging': 38.390401671128416, 'Cost': 1.4040000000000001}, total_cost:47.34380931087152, eps:0.9803966865736877, alpha: 0.1992055898794543, episode:200
State:[59.91522    3.5858023]; Total rewards: 23129.801885706293, info:{'Day': 345, 'ChargType': 5, 'OrAct': 5, 'ChargAftCharging': 3.585802322121584, 'Cost': 0}, total_cost:950.1427142937216, eps:0.970741078213023, alpha: 0.19880957484673314, epis

State:[59.99988    1.7141042]; Total rewards: 139.0646, info:{'Day': 3, 'ChargType': 15, 'OrAct': 15, 'ChargAftCharging': 5.314104165347793, 'Cost': 0.468}, total_cost:0.936, eps:0.7217344960976069, alpha: 0.18729004199403215, episode:3300
State:[59.954536   6.5628157]; Total rewards: 14449.009327021908, info:{'Day': 215, 'ChargType': 7, 'OrAct': 7, 'ChargAftCharging': 6.56281559320772, 'Cost': 0}, total_cost:530.9738729781143, eps:0.7146526195349836, alpha: 0.1869178342418655, episode:3400
State:[59.913063   3.8393438]; Total rewards: 24078.819268047097, info:{'Day': 359, 'ChargType': 8, 'OrAct': 8, 'ChargAftCharging': 3.8393437386081644, 'Cost': 0}, total_cost:981.126731952922, eps:0.7076412088215263, alpha: 0.18654637016128373, episode:3500
State:[59.96329  23.567507]; Total rewards: 12307.140591802565, info:{'Day': 183, 'ChargType': 4, 'OrAct': 4, 'ChargAftCharging': 23.56750587573784, 'Cost': 0}, total_cost:432.8512081974505, eps:0.7006995628103208, alpha: 0.18617564826642996, epi

State:[59.99856    1.0377475]; Total rewards: 740.2095202465734, info:{'Day': 12, 'ChargType': 19, 'OrAct': 19, 'ChargAftCharging': 11.837747451358094, 'Cost': 1.4040000000000001}, total_cost:29.791879753426464, eps:0.5216828211467822, alpha: 0.1753918580207953, episode:6600
State:[59.979603 10.648677]; Total rewards: 8279.553439950458, info:{'Day': 123, 'ChargType': 26, 'OrAct': 26, 'ChargAftCharging': 11.54867642037967, 'Cost': 0.117}, total_cost:260.4603600495503, eps:0.516591492008677, alpha: 0.17504342285606347, episode:6700
State:[59.99016   5.977373]; Total rewards: 4409.99763393349, info:{'Day': 66, 'ChargType': 6, 'OrAct': 48, 'ChargAftCharging': 5.977373271847192, 'Cost': 0}, total_cost:140.01296606651277, eps:0.5115508224419336, alpha: 0.17469568386525514, episode:6800
State:[60.       11.841029]; Total rewards: 70.00200000000001, info:{'Day': 2, 'ChargType': 1, 'OrAct': 1, 'ChargAftCharging': 11.841029369524094, 'Cost': 0}, total_cost:0, eps:0.5065603083753949, alpha: 0.174

State:[60.       11.841029]; Total rewards: 70.00200000000001, info:{'Day': 2, 'ChargType': 7, 'OrAct': 7, 'ChargAftCharging': 11.841029369524094, 'Cost': 0}, total_cost:0, eps:0.38155798786288553, alpha: 0.16458043470095912, episode:9800
State:[59.97025   9.342971]; Total rewards: 11988.359098312616, info:{'Day': 178, 'ChargType': 6, 'OrAct': 6, 'ChargAftCharging': 9.342970610982796, 'Cost': 0}, total_cost:401.66530168740184, eps:0.3778609241118252, alpha: 0.16425360077442838, episode:9900
State:[59.970726  13.8922615]; Total rewards: 12185.247498418805, info:{'Day': 181, 'ChargType': 1, 'OrAct': 29, 'ChargAftCharging': 13.892261753137456, 'Cost': 0}, total_cost:414.77970158121224, eps:0.3742006467597279, alpha: 0.16392741986251838, episode:10000
State:[59.9982   27.591002]; Total rewards: 740.4921578153155, info:{'Day': 12, 'ChargType': 19, 'OrAct': 21, 'ChargAftCharging': 38.39100165456858, 'Cost': 1.4040000000000001}, total_cost:29.508642184684337, eps:0.3705767897758081, alpha: 0.

State:[59.978043 11.700497]; Total rewards: 10616.9970635011, info:{'Day': 157, 'ChargType': 3, 'OrAct': 3, 'ChargAftCharging': 11.700496259719879, 'Cost': 0}, total_cost:303.0363364989133, eps:0.27980647510367246, alpha: 0.1544392655749097, episode:13000
State:[59.98368    3.4116805]; Total rewards: 8063.811760587097, info:{'Day': 120, 'ChargType': 19, 'OrAct': 19, 'ChargAftCharging': 14.211680510601019, 'Cost': 1.4040000000000001}, total_cost:266.2128394129103, eps:0.2771218558208399, alpha: 0.1541326937178076, episode:13100
State:[59.99856  14.050702]; Total rewards: 749.3089361745249, info:{'Day': 12, 'ChargType': 4, 'OrAct': 4, 'ChargAftCharging': 14.050702112408054, 'Cost': 0}, total_cost:20.694663825474983, eps:0.27446394894619186, alpha: 0.15382673439168454, episode:13200
State:[59.931038 48.591503]; Total rewards: 24661.460678349875, info:{'Day': 365, 'ChargType': 8, 'OrAct': 64, 'ChargAftCharging': 48.59150275551474, 'Cost': 0}, total_cost:888.5343216501315, eps:0.27183248868

State:[59.98524   9.129788]; Total rewards: 7747.863367878557, info:{'Day': 114, 'ChargType': 5, 'OrAct': 5, 'ChargAftCharging': 9.129788332170728, 'Cost': 0}, total_cost:162.1580321214535, eps:0.2059197120927785, alpha: 0.14492679823358864, episode:16200
State:[59.99868  14.050822]; Total rewards: 748.3182468146043, info:{'Day': 12, 'ChargType': 12, 'OrAct': 12, 'ChargAftCharging': 14.05082211000807, 'Cost': 0}, total_cost:21.685553185395626, eps:0.20397027838564025, alpha: 0.14463923229891146, episode:16300
State:[59.9934     5.5631676]; Total rewards: 2969.9152283062817, info:{'Day': 45, 'ChargType': 27, 'OrAct': 27, 'ChargAftCharging': 6.013167411857723, 'Cost': 0.0585}, total_cost:110.08757169371786, eps:0.20204024186798297, alpha: 0.14435224092135499, episode:16400
State:[59.99892  14.051062]; Total rewards: 755.5712491566428, info:{'Day': 12, 'ChargType': 3, 'OrAct': 3, 'ChargAftCharging': 14.051062105928104, 'Cost': 0}, total_cost:14.433950843357117, eps:0.20012940953454655, al

State:[59.9934   11.422896]; Total rewards: 3262.376178439928, info:{'Day': 49, 'ChargType': 2, 'OrAct': 16, 'ChargAftCharging': 11.422896651450579, 'Cost': 0}, total_cost:97.63182156007328, eps:0.15226691027992587, alpha: 0.13600405630955673, episode:19400
State:[59.9946    9.335486]; Total rewards: 2293.0991338408903, info:{'Day': 35, 'ChargType': 3, 'OrAct': 3, 'ChargAftCharging': 9.335486844626615, 'Cost': 0}, total_cost:86.90786615910986, eps:0.15085133087064842, alpha: 0.13573431802513478, episode:19500
State:[59.975166 13.325739]; Total rewards: 12889.76522547335, info:{'Day': 190, 'ChargType': 4, 'OrAct': 18, 'ChargAftCharging': 13.325739224079777, 'Cost': 0}, total_cost:340.26637452666637, eps:0.14944983671183454, alpha: 0.13546511867816458, episode:19600
State:[59.976723   7.8368073]; Total rewards: 12903.418835606033, info:{'Day': 190, 'ChargType': 11, 'OrAct': 53, 'ChargAftCharging': 7.8368073665072036, 'Cost': 0}, total_cost:326.62736439397696, eps:0.14806228765290042, alp

State:[59.99376  11.524449]; Total rewards: 3281.96316941939, info:{'Day': 49, 'ChargType': 6, 'OrAct': 20, 'ChargAftCharging': 11.524449035178307, 'Cost': 0}, total_cost:78.04443058061048, eps:0.11330697990721733, alpha: 0.12763447977529233, episode:22600
State:[59.99796   8.869956]; Total rewards: 819.0056, info:{'Day': 13, 'ChargType': 5, 'OrAct': 9, 'ChargAftCharging': 8.86995572463941, 'Cost': 0}, total_cost:20.994999999999997, eps:0.1122790582822692, alpha: 0.1273814639159397, episode:22700
State:[59.958973 12.520511]; Total rewards: 20808.963732476044, info:{'Day': 306, 'ChargType': 9, 'OrAct': 9, 'ChargAftCharging': 12.52051051606782, 'Cost': 0}, total_cost:541.0798675239774, eps:0.11126136464838206, alpha: 0.12712895358261125, episode:22800
State:[59.977203   5.9370475]; Total rewards: 12918.144849186156, info:{'Day': 190, 'ChargType': 11, 'OrAct': 25, 'ChargAftCharging': 5.937047277003709, 'Cost': 0}, total_cost:311.90115081385375, eps:0.11025379723534455, alpha: 0.1268769477

State:[59.977802 15.228618]; Total rewards: 12954.358514949807, info:{'Day': 190, 'ChargType': 4, 'OrAct': 18, 'ChargAftCharging': 15.228618117890258, 'Cost': 0}, total_cost:275.6794850502071, eps:0.08501626398261702, alpha: 0.11978377514219725, episode:25800
State:[59.96977   9.400591]; Total rewards: 16920.188824563393, info:{'Day': 249, 'ChargType': 16, 'OrAct': 16, 'ChargAftCharging': 14.80059055028706, 'Cost': 0.7020000000000001}, total_cost:439.8665754366232, eps:0.08426983968447366, alpha: 0.11954644500116393, episode:25900
State:[59.9988   27.591602]; Total rewards: 753.5614258165988, info:{'Day': 12, 'ChargType': 41, 'OrAct': 41, 'ChargAftCharging': 29.491601641848696, 'Cost': 0.247}, total_cost:16.441174183401195, eps:0.08353084243219053, alpha: 0.11930958904606868, episode:26000
State:[59.947582 48.608047]; Total rewards: 24878.724177734985, info:{'Day': 365, 'ChargType': 7, 'OrAct': 35, 'ChargAftCharging': 48.60804609878353, 'Cost': 0}, total_cost:671.3084222650325, eps:0.0

State:[59.99856  14.050702]; Total rewards: 754.1625115559566, info:{'Day': 12, 'ChargType': 19, 'OrAct': 23, 'ChargAftCharging': 24.850702113128033, 'Cost': 1.4040000000000001}, total_cost:15.837888444043513, eps:0.06447298785584314, alpha: 0.11241977494651502, episode:29000
State:[59.94998  48.610443]; Total rewards: 24936.30841733542, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.61044406108465, 'Cost': 0}, total_cost:613.7351826646119, eps:0.06393097257049796, alpha: 0.1121971580876864, episode:29100
State:[59.981163 17.952818]; Total rewards: 12349.779429453622, info:{'Day': 181, 'ChargType': 12, 'OrAct': 26, 'ChargAftCharging': 17.95281765161691, 'Cost': 0}, total_cost:250.26717054639664, eps:0.06339435042735246, alpha: 0.11197498601763844, episode:29200
State:[59.973846   8.2492285]; Total rewards: 15033.719990755908, info:{'Day': 221, 'ChargType': 9, 'OrAct': 9, 'ChargAftCharging': 8.249228436439392, 'Cost': 0}, total_cost:366.3284092441157, eps:0.062863

State:[59.978645   6.3511353]; Total rewards: 12989.381899623202, info:{'Day': 191, 'ChargType': 15, 'OrAct': 15, 'ChargAftCharging': 9.951135213811037, 'Cost': 0.468}, total_cost:310.65930037682637, eps:0.04955550767804736, alpha: 0.10551230594641979, episode:32200
State:[59.99388   7.924569]; Total rewards: 3288.949192780664, info:{'Day': 49, 'ChargType': 1, 'OrAct': 29, 'ChargAftCharging': 7.924569024138805, 'Cost': 0}, total_cost:71.05760721933744, eps:0.04916192380051474, alpha: 0.10530349021985876, episode:32300
State:[59.99688  25.400003]; Total rewards: 1433.2470368281358, info:{'Day': 22, 'ChargType': 26, 'OrAct': 26, 'ChargAftCharging': 26.300002578840147, 'Cost': 0.117}, total_cost:36.754563171864056, eps:0.0487722561479972, alpha: 0.10509509170739766, episode:32400
State:[59.96605    9.3968725]; Total rewards: 16922.950298482207, info:{'Day': 249, 'ChargType': 28, 'OrAct': 28, 'ChargAftCharging': 16.99687252952438, 'Cost': 0.988}, total_cost:437.08470151781165, eps:0.048386

State:[59.99316    3.9617307]; Total rewards: 3283.6508190124623, info:{'Day': 49, 'ChargType': 1, 'OrAct': 29, 'ChargAftCharging': 3.9617307122671512, 'Cost': 0}, total_cost:76.35678098753999, eps:0.03872319381137508, alpha: 0.09903306549022696, episode:35400
State:[59.9501   48.610565]; Total rewards: 24894.639301154144, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.610563952892846, 'Cost': 0}, total_cost:655.4214988458858, eps:0.03843739325769703, alpha: 0.09883719529473205, episode:35500
State:[59.946865 48.607327]; Total rewards: 24874.759219180098, info:{'Day': 365, 'ChargType': 7, 'OrAct': 35, 'ChargAftCharging': 48.60732672850946, 'Cost': 0}, total_cost:675.2771808199374, eps:0.03815443646704266, alpha: 0.09864171644814879, episode:35600
State:[59.95274 48.6132 ]; Total rewards: 24965.1916761895, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.61320183549782, 'Cost': 0}, total_cost:584.8575238105333, eps:0.03787429514349709, alpha: 

State:[59.993042   2.3055623]; Total rewards: 4462.38320258449, info:{'Day': 66, 'ChargType': 11, 'OrAct': 67, 'ChargAftCharging': 2.305562285517823, 'Cost': 0}, total_cost:87.63419741551378, eps:0.03085731952781101, alpha: 0.09295550554915631, episode:38600
State:[59.99484  11.569182]; Total rewards: 3292.380008619289, info:{'Day': 49, 'ChargType': 6, 'OrAct': 34, 'ChargAftCharging': 11.569182551332815, 'Cost': 0}, total_cost:67.63139138071169, eps:0.030649785730962567, alpha: 0.09277177832652303, episode:38700
State:[59.980682 19.752459]; Total rewards: 12352.63441085977, info:{'Day': 181, 'ChargType': 10, 'OrAct': 24, 'ChargAftCharging': 19.752457762480027, 'Cost': 0}, total_cost:247.406389140249, eps:0.03044431692989543, alpha: 0.09258841819112543, episode:38800
State:[59.9982   27.591002]; Total rewards: 747.5499219759446, info:{'Day': 12, 'ChargType': 5, 'OrAct': 7, 'ChargAftCharging': 27.591001654808586, 'Cost': 0}, total_cost:22.45207802405523, eps:0.030240892577558277, alpha: 

State:[59.950222 43.561764]; Total rewards: 24900.469861043355, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 43.56176374691192, 'Cost': 0}, total_cost:649.5821389566794, eps:0.02514552249105825, alpha: 0.08725472393948436, episode:41800
State:[59.9501   48.610565]; Total rewards: 24895.73544577061, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.61056395888791, 'Cost': 0}, total_cost:654.3073542294039, eps:0.02499482202431651, alpha: 0.08708238688610445, episode:41900
State:[59.9531  43.56464]; Total rewards: 24928.6500622307, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 43.56464144167019, 'Cost': 0}, total_cost:621.403737769329, eps:0.024845621052272927, alpha: 0.08691039416238686, episode:42000
State:[59.961014   2.5595512]; Total rewards: 21200.899749887718, info:{'Day': 311, 'ChargType': 20, 'OrAct': 20, 'ChargAftCharging': 15.159551284741031, 'Cost': 1.6380000000000001}, total_cost:499.15005011230335, eps:0.02469790

State:[59.99784   9.199027]; Total rewards: 807.1863314231401, info:{'Day': 13, 'ChargType': 5, 'OrAct': 5, 'ChargAftCharging': 9.199026848026328, 'Cost': 0}, total_cost:32.815068576859716, eps:0.02099790657285988, alpha: 0.08190736228837923, episode:45000
State:[59.99388   9.724569]; Total rewards: 3291.752331773309, info:{'Day': 49, 'ChargType': 9, 'OrAct': 23, 'ChargAftCharging': 9.724569023658844, 'Cost': 0}, total_cost:68.25746822669302, eps:0.020888475574048812, alpha: 0.08174570927070447, episode:45100
State:[59.991722    0.51321805]; Total rewards: 5278.591654806549, info:{'Day': 78, 'ChargType': 7, 'OrAct': 8, 'ChargAftCharging': 0.5132180228186591, 'Cost': 0}, total_cost:111.4335451934577, eps:0.020780133431868894, alpha: 0.08158437923597447, episode:45200
State:[59.9513   43.562843]; Total rewards: 24929.123752337364, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 43.56284287438265, 'Cost': 0}, total_cost:620.9200476626767, eps:0.020672869312015653, alpha

State:[59.99412   2.524689]; Total rewards: 3292.4352080077197, info:{'Day': 49, 'ChargType': 5, 'OrAct': 5, 'ChargAftCharging': 2.5246890118993868, 'Cost': 0}, total_cost:67.5779919922818, eps:0.01798611926770862, alpha: 0.07689151032534372, episode:48200
State:[59.99412   2.524689]; Total rewards: 3292.4352080077197, info:{'Day': 49, 'ChargType': 4, 'OrAct': 4, 'ChargAftCharging': 2.5246890118993868, 'Cost': 0}, total_cost:67.5779919922818, eps:0.017906656053295947, alpha: 0.07673987898657557, episode:48300
State:[59.95442 48.61488]; Total rewards: 24955.41216571695, info:{'Day': 365, 'ChargType': 12, 'OrAct': 40, 'ChargAftCharging': 48.61488053975512, 'Cost': 0}, total_cost:594.6468342830702, eps:0.017827983511077505, alpha: 0.07658855060742437, episode:48400
State:[59.993042   2.3055623]; Total rewards: 4458.740582369552, info:{'Day': 66, 'ChargType': 4, 'OrAct': 32, 'ChargAftCharging': 2.305562283837972, 'Cost': 0}, total_cost:91.27981763045305, eps:0.01775009377373351, alpha: 0.0

State:[59.993042  8.738759]; Total rewards: 4466.272595842998, info:{'Day': 66, 'ChargType': 2, 'OrAct': 16, 'ChargAftCharging': 8.738758905919322, 'Cost': 0}, total_cost:83.74280415700362, eps:0.015799112816202263, alpha: 0.07218661610711039, episode:51400
State:[59.99856    7.8997436]; Total rewards: 756.3683361768757, info:{'Day': 12, 'ChargType': 1, 'OrAct': 1, 'ChargAftCharging': 7.899743490671888, 'Cost': 0}, total_cost:13.634263823124215, eps:0.01574141067957372, alpha: 0.07204438515326034, episode:51500
State:[59.993042  8.738759]; Total rewards: 4465.4448402239295, info:{'Day': 66, 'ChargType': 5, 'OrAct': 5, 'ChargAftCharging': 8.738758905919322, 'Cost': 0}, total_cost:84.57055977607226, eps:0.01568428268879766, alpha: 0.07190243837704562, episode:51600
State:[59.993042  8.738759]; Total rewards: 4466.272595842998, info:{'Day': 66, 'ChargType': 0, 'OrAct': 28, 'ChargAftCharging': 8.738758905919322, 'Cost': 0}, total_cost:83.74280415700362, eps:0.0156277231310274, alpha: 0.071

State:[59.99352   8.739239]; Total rewards: 4465.444671424056, info:{'Day': 66, 'ChargType': 4, 'OrAct': 18, 'ChargAftCharging': 8.739238858641553, 'Cost': 0}, total_cost:84.57052857594758, eps:0.014211020187366974, alpha: 0.0677734018081443, episode:54600
State:[59.9934    8.739119]; Total rewards: 4459.283520037215, info:{'Day': 66, 'ChargType': 2, 'OrAct': 2, 'ChargAftCharging': 8.739118869201132, 'Cost': 0}, total_cost:90.73347996278805, eps:0.014169119836417061, alpha: 0.06763998846234491, episode:54700
State:[59.954178 48.61464 ]; Total rewards: 24913.767665289884, info:{'Day': 365, 'ChargType': 12, 'OrAct': 40, 'ChargAftCharging': 48.61464071776945, 'Cost': 0}, total_cost:636.2899347101484, eps:0.01412763640092507, alpha: 0.06750684167658824, episode:54800
State:[59.94974  48.610203]; Total rewards: 24885.009396555786, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.610204259003424, 'Cost': 0}, total_cost:665.0308034442567, eps:0.014086565732512882, alpha: 

State:[59.99556 10.05062]; Total rewards: 2400.3210512404794, info:{'Day': 36, 'ChargType': 8, 'OrAct': 8, 'ChargAftCharging': 10.050620033940183, 'Cost': 0}, total_cost:49.68554875952066, eps:0.013057828254154401, alpha: 0.06363378473171359, episode:57800
State:[59.952618 25.171839]; Total rewards: 24947.582865483288, info:{'Day': 365, 'ChargType': 3, 'OrAct': 59, 'ChargAftCharging': 25.17183962616802, 'Cost': 0}, total_cost:602.472134516734, eps:0.013027402354659073, alpha: 0.06350864234634966, episode:57900
State:[59.95178  48.612244]; Total rewards: 24925.91737033167, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.61224259288055, 'Cost': 0}, total_cost:624.1362296683403, eps:0.012997279197922054, alpha: 0.06338374999563845, episode:58000
State:[59.94998  48.610443]; Total rewards: 24905.82403653182, info:{'Day': 365, 'ChargType': 2, 'OrAct': 30, 'ChargAftCharging': 48.61044406084474, 'Cost': 0}, total_cost:644.223563468206, eps:0.012967455771602571, alpha: 0.

In [60]:
state_row = q_table[55,48,0:]
state_row

array([246.60573439, 249.05498245, 248.24125839, 243.46390599,
       247.2942367 , 248.84351556, 248.09622033, 248.85229673,
       248.07946648, 246.59606655, 246.61243612, 236.76488683,
       249.39803816, 246.63299698, 247.28079232, 248.89047897,
       248.93606911, 249.17984646, 249.19300269, 249.26943578,
       249.28383526, 244.63118452, 249.24851556, 249.28605822,
       247.90063871, 248.82540982, 242.8216473 , 248.03931803,
       248.4038174 , 244.60824177, 249.19833241, 249.27091468,
       249.530321  , 249.27184943, 249.23967073, 248.9416698 ,
       249.14884874, 248.94493703, 249.23551299, 249.14553344,
       248.89484868, 248.49708775, 249.1912005 , 249.15850401,
       248.82532792, 249.19999077, 248.7349416 , 248.85240545,
       248.27730978, 249.23148024, 248.46396634, 248.65988326,
       248.71795892, 249.32807711, 249.28211926, 248.56513152,
       248.97456381, 246.91659672, 246.85798676, 241.26274212,
       242.27845809, 248.22677091, 235.19274109, 241.66

In [26]:
#q_table

In [9]:
np.save('q_table110823FinalFinal',q_table)

NameError: name 'q_table' is not defined

In [10]:
q_table = np.load('q_table110823FinalFinal.npy')

In [11]:
observation = env.reset()
rewards = 0
tank_size_list = []
charge_list = []
my_dict = {'Charger Type': [], 'Charging Time': [], 'Original Charger Type': [], 'Original Charging Time':[],
           'Cost': [], 'Charge Before Charging':[], 
          'Battery Size Before Charging': [], 'Charge After Charging':[], 'Reward':[]}

total_cost = 0
total_cost_list = []

for _ in range(365):
    discrete_state = discretize_obs(observation, BINS)  # get bins
    action = np.argmax(q_table[discrete_state])# and chose action from the Q-Table
    observation, reward, done, info = env.step(action) # Finally perform the action
    rewards = rewards + reward
    my_dict['Reward'].append(reward)
    my_dict['Charge After Charging'].append(info['ChargAftCharging'])
    my_dict['Battery Size Before Charging'].append(observation[0])
    my_dict['Charge Before Charging'].append(observation[1])
    my_dict['Cost'].append(info['Cost'])
    my_dict['Original Charger Type'].append(int(info['OrAct']/14))
    my_dict['Original Charging Time'].append(info['OrAct'] % 14)
    my_dict['Charger Type'].append(int(info['ChargType']/14))
    my_dict['Charging Time'].append(info['ChargType'] % 14)
    total_cost += info['Cost']
    total_cost_list.append(total_cost)
    tank_size_list.append(observation[0])
    
        
    print(f'State:{observation}; Total rewards: {rewards}; info:{info}, done:{done}, total_cost: {total_cost}')
    if done:
        print(f"You got {rewards} points!")
        break
env.close()

State:[60.       24.802422]; Total rewards: 70.001; info:{'Day': 1, 'ChargType': 1, 'OrAct': 1, 'ChargAftCharging': 24.80242222146822, 'Cost': 0}, done:False, total_cost: 0
State:[60.       11.841029]; Total rewards: 139.5328; info:{'Day': 2, 'ChargType': 15, 'OrAct': 15, 'ChargAftCharging': 15.441029369524093, 'Cost': 0.468}, done:False, total_cost: 0.468
State:[59.99988    1.7141042]; Total rewards: 207.5564; info:{'Day': 3, 'ChargType': 29, 'OrAct': 29, 'ChargAftCharging': 16.914104165347794, 'Cost': 1.976}, done:False, total_cost: 2.444
State:[59.99964    2.8180835]; Total rewards: 275.6842; info:{'Day': 4, 'ChargType': 21, 'OrAct': 23, 'ChargAftCharging': 17.218083524700667, 'Cost': 1.872}, done:False, total_cost: 4.316
State:[59.99952  14.661438]; Total rewards: 340.74379999999996; info:{'Day': 5, 'ChargType': 32, 'OrAct': 32, 'ChargAftCharging': 52.6614381533751, 'Cost': 4.9399999999999995}, done:False, total_cost: 9.256
State:[59.99928  43.082325]; Total rewards: 408.5441956615

  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [84]:
charger_type = {0: 'No Charge', 1: 'L1', 2: 'L2', 3: 'DC', 4:'xDC'}
charging_time = {0:1, 1:2, 2:3, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11,11:12, 12:0.5, 13:0.25}
df = pd.DataFrame(data=my_dict)
df['Charger Type'] = df['Charger Type'].map(charger_type)
df['Charging Time'] = df['Charging Time'].map(charging_time)
df['Original Charger Type'] = df['Original Charger Type'].map(charger_type)
df['Original Charging Time'] = df['Original Charging Time'].map(charging_time)
df

Unnamed: 0,Charger Type,Charging Time,Original Charger Type,Original Charging Time,Cost,Charge Before Charging,Battery Size Before Charging,Charge After Charging,Reward
0,No Charge,2.0,No Charge,2.0,0.000,24.802422,60.000000,24.802422,70.0010
1,L1,2.0,L1,2.0,0.468,11.841029,60.000000,15.441029,69.5318
2,L2,2.0,L2,2.0,1.976,1.714104,59.999882,16.914104,68.0236
3,L1,8.0,L1,10.0,1.872,2.818084,59.999641,17.218084,68.1278
4,L2,5.0,L2,5.0,4.940,14.661438,59.999519,52.661438,65.0596
...,...,...,...,...,...,...,...,...,...
360,No Charge,2.0,No Charge,2.0,0.000,35.943741,59.952259,35.943740,70.0010
361,L1,4.0,L1,4.0,0.936,20.128796,59.952259,27.328796,69.0638
362,L2,1.0,L2,1.0,0.988,21.813232,59.952141,29.413233,69.0116
363,L1,9.0,L1,11.0,2.106,24.364313,59.951900,40.564313,67.8938


In [85]:
df['Charger Type'].unique()

array(['No Charge', 'L1', 'L2', 'DC', 'xDC'], dtype=object)

In [90]:
len(df[df['Charger Type'] == 'DC'])

23

In [87]:
len(df[df['Charger Type'] == 'No Charge'])

127

In [39]:
df['Charging Time'].unique()

array([ 7.  ,  8.  ,  3.  ,  2.  ,  6.  ,  5.  ,  1.  ,  4.  ,  0.25,
        0.5 ,  9.  , 10.  , 11.  ])

In [91]:
df.to_csv('QLearningDecision11-08_FinalFinal.csv',index=False)