In [24]:
from __future__ import print_function
import pandapower as pp
import pandapower.networks as pn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import stable_baselines
import pickle
from stable_baselines import DDPG
from scipy.stats import wilcoxon
import sys
import copy
sys.path.append('C:\\Users\\vegar\\Dropbox\\Master\\thesis.git')
from  gym_power.envs.active_network_env import ActiveEnv
import seaborn as sns
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
%matplotlib notebook


# I need to see the actions of the agent
The agent activates flexibility to help the net. Need to plot the actions of the agent togeather with solar irradiance and demand at each load.


In [3]:
def stack_columns(df,columns):
    stacked = pd.DataFrame()
    values, labels = [], []
    for col in columns:
        values += list(df[col])
        labels += [col for _ in range(len(df))]
    stacked['Reward'] = values
    stacked[''] = labels
    return stacked
    

In [4]:
def pickle_savefig(fig,figname):
    matplotlib_name = 'figs/' + figname + '_plt.p'
    with open(matplotlib_name,'wb') as f:
        pickle.dump(fig,f)
    fig.savefig('figs/' + figname +'.png')
    
    
    
    

In [5]:
def read_pickle_fig(figname):
    with open('figs/'+figname +'_plt.p', 'rb') as f:
        fig = pickle.load(f)
    return fig

In [6]:
def find_load_names(sol_bus):
    nr_sol = 1
    nr_else = 1
    load_names = []
    for k in range(len(sol_bus)):
        if sol_bus[k]:
            load_names.append('sun {}'.format(nr_sol))
            nr_sol += 1
        else:
            load_names.append('load {}'.format(nr_else))
            nr_else += 1
    return load_names

In [7]:
def calc_hour(start_hour,time_step):
    return (start_hour + time_step) % 24

In [8]:
def simulate_day2(env,model, show_imbalance=False, show_solar=True, show_action=True,
                  show_demand=False,period=25):
    net = env.powergrid
    sol_bus = net.load['bus'].isin(net.sgen['bus'])
    actions = []
    t_steps = []
    flex_loads = []
    sols = []
    obs = env.reset()
    sol = env.solar_forecasts
    demand = env.demand_forecasts[0]
    names = find_load_names(sol_bus)
    hues = []
    for t_step in range(1,period):
        
        action,_ = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        
        if show_action:
            actions += list(action)
            hues += ['action' for _ in range(len(action))]
            t_steps += list(t_step*np.ones_like(action))
            flex_loads += names
        
        if show_solar:
            actions += list(sol[t_step-1]*np.ones_like(action))
            hues += ['sun' for _ in range(len(action))]
            t_steps += list(t_step*np.ones_like(action))
            flex_loads += names
        if show_imbalance:
            try:
                imbalance = env.calc_balance()/30000
            except AttributeError:
                imbalance = env.calc_imbalance()/30000
            actions += list(imbalance*np.ones_like(action))
            hues += ['imbalance' for _ in range(len(action))]
            t_steps += list(t_step*np.ones_like(action))
            flex_loads += names
            
        if show_demand:
            actions += list(demand[t_step-1]*np.ones_like(action))
            hues += ['demand' for _ in range(len(action))]
            t_steps += list(t_step*np.ones_like(action))
            flex_loads += names
            


    df = pd.DataFrame()
    df['actions'] = actions
    df['steps'] = t_steps
    df['load'] = flex_loads
    df[''] = hues
    return df

In [9]:
def data_from_subplot(ax, imbalance=False):
    actions = ax.lines[3].get_ydata()
    sun = ax.lines[4].get_ydata()
    balance = ax.lines[5].get_ydata()
    oneplot = pd.DataFrame()
    if imbalance:
        oneplot['Energy imbalance'] = balance
    oneplot['Solar irradiance'] = sun
    oneplot['Action'] = actions
    return oneplot

In [10]:
def change_hours(x,start=155):
    return x-start
def change_legend(x):
    if x == 'No agent':
        return 'No action'
    else:
        return x

## Import models

In [11]:
def load_env(model_name='flexible_load_first',seed=9):
#flexible_load_first, overnight, larger_margin_cost, discount_06, flex50

    params_name = model_name +'_params.p'
    model = DDPG.load('models/'+model_name)
    env = ActiveEnv(seed=seed)
    with open('models/' + params_name,'rb') as f:
        params = pickle.load(f)

    env.set_parameters(params)
    model.set_env(env)
    return model, env

In [13]:
model, env = load_env()

Loading a model without an environment, this model cannot be trained until it has a valid environment.
setting up target updates ...
  target/pi/fc0/kernel:0 <- model/pi/fc0/kernel:0
  target/pi/fc0/bias:0 <- model/pi/fc0/bias:0
  target/pi/LayerNorm/beta:0 <- model/pi/LayerNorm/beta:0
  target/pi/LayerNorm/gamma:0 <- model/pi/LayerNorm/gamma:0
  target/pi/fc1/kernel:0 <- model/pi/fc1/kernel:0
  target/pi/fc1/bias:0 <- model/pi/fc1/bias:0
  target/pi/LayerNorm_1/beta:0 <- model/pi/LayerNorm_1/beta:0
  target/pi/LayerNorm_1/gamma:0 <- model/pi/LayerNorm_1/gamma:0
  target/pi/pi/kernel:0 <- model/pi/pi/kernel:0
  target/pi/pi/bias:0 <- model/pi/pi/bias:0
  target/qf/fc0/kernel:0 <- model/qf/fc0/kernel:0
  target/qf/fc0/bias:0 <- model/qf/fc0/bias:0
  target/qf/LayerNorm/beta:0 <- model/qf/LayerNorm/beta:0
  target/qf/LayerNorm/gamma:0 <- model/qf/LayerNorm/gamma:0
  target/qf/fc1/kernel:0 <- model/qf/fc1/kernel:0
  target/qf/fc1/bias:0 <- model/qf/fc1/bias:0
  target/qf/LayerNorm_1/beta:

In [14]:
df = simulate_day2(env,model, show_demand=True, period=199)
sns.set(style="ticks")

grid = sns.FacetGrid(df, col="load", hue="",
                     col_wrap=6, height=1.5)

grid.map(plt.axhline, y=0, ls=":", c=".5")

grid.map(plt.plot, "steps", "actions")
grid.add_legend()



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x125b4e86780>

In [221]:
@interact
def plot_load(ax_nr1=range(18),ax_nr2=range(18),period=(0,200)):
    plot1 = data_from_subplot(grid.axes[ax_nr1],imbalance=True)
    plot2 = data_from_subplot(grid.axes[ax_nr2],imbalance=True)

    fig, axes = plt.subplots(2)
    #axes.set_title(ax.get_title())
    #axes.set_title('Total energy imbalance')
    axes[0].set_xlabel('steps')
    axes[0].axhline(0,c=".5",ls='--')
    axes[1].axhline(0,c=".5",ls='--')
    #axes.set_ylabel('MWh')
    plot1[:period].plot(ax=axes[0])
    plot2[:period].plot(ax=axes[1])
    plt.tight_layout()


interactive(children=(Dropdown(description='ax_nr1', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…

## See resulting voltages etc

In [42]:
env.powergrid.res_bus

Unnamed: 0,vm_pu,va_degree,p_kw,q_kvar
0,1.03,0.0,-29900.456517,-9086.961999
1,1.009357,-3.664185,11333.733241,2933.480999
2,0.996422,-4.237402,0.0,0.0
3,0.975807,-5.144488,330.646468,144.530961
4,0.974814,-5.196491,322.639727,68.400152
5,0.974195,-5.227244,361.806933,115.281155
6,0.973481,-5.262905,262.167977,86.845137
7,0.972465,-5.277896,63.411203,34.208192
8,0.97266,-5.268847,385.352659,92.993465
9,0.97212,-5.286005,311.028317,256.561444


## Max/Min activation in all hours
See effect of letting the agent controll reactive power aswell

In [175]:
period=199
#model, env = load_env(seed=9)
env1 = ActiveEnv(seed=9)
net = env1.powergrid
actions = []
t_steps = []
flex_loads = []
obs = env1.reset()
sol = env1.solar_forecasts
hues = []
env1.set_parameters({'flexibility':0.25,
                   'solar_scale':1.2,
                   'reactive_power':False})
env2 = copy.deepcopy(env1)
env2.set_parameters({'reactive_power':True})
env2.solar_forecasts = env1.solar_forecasts 
env2.demand_forecasts = env1.demand_forecasts
env3 = copy.deepcopy(env1)
env3.do_action = False
legend_map = {0:'Active',1:'- 25 % demand', 2: 'No action'}


action = -np.ones(18)
for t_step in range(1,period):
    for i, env in enumerate([env1, env2, env3]):
        obs, rewards, dones, info = env.step(action)
        voltage = env.powergrid.res_bus.vm_pu
        actions += list(voltage)
        hues += [legend_map[i] for _ in range(len(voltage))]
        t_steps += list(t_step*np.ones_like(v_active))
        flex_loads += list(range(len(v_active)))

df = pd.DataFrame()
df['Voltage [pu]'] = actions
df['Hour'] = t_steps
df['Load'] = flex_loads
df[''] = hues

In [176]:
sns.set(style="ticks")
grid = sns.FacetGrid(df, col="Load", hue="",
                     col_wrap=6, height=1.5)


grid.map(plt.axhline, y=1, ls=":", c=".5")

grid.map(plt.plot, "Hour", "Voltage [pu]")
grid.add_legend()



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x26b432a6d30>

In [177]:
@interact
def plot_voltage(bus_nr=list(range(15))[::-1]):
    data = df[df['Load']== bus_nr]
    fig, ax = plt.subplots()
    sns.lineplot(x="Hour", y="Voltage [pu]", data=data, ax=ax, hue='')
    

interactive(children=(Dropdown(description='bus_nr', options=(14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0…

In [178]:
data = df[df['Load']== 8]
data = data[data[''].isin(['No action', '- 25 % demand'])]
data = data[data['Hour'].isin(range(156,183))]
data[''] = data[''].apply(lambda x:change_legend(x))
data['Hour'] = data['Hour'].apply(lambda x:change_hours(x,start=157))
fig, ax = plt.subplots()
sns.lineplot(x="Hour", y="Voltage [pu]", data=data, ax=ax, hue='')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x26b5a205fd0>

In [180]:
#pickle_savefig(fig,'decrease_demand_voltage')

## Line capacity effect

In [164]:
period=199
#model, env = load_env(seed=9)
env1 = ActiveEnv(seed=9)
net = env1.powergrid
actions = []
t_steps = []
flex_loads = []
obs = env1.reset()
sol = env1.solar_forecasts
hues = []
env1.set_parameters({'flexibility':0.25,
                   'solar_scale':1,
                   'reactive_power':False})
env2 = copy.deepcopy(env1)
env2.set_parameters({'reactive_power':True})
env2.solar_forecasts = env1.solar_forecasts 
env2.demand_forecasts = env1.demand_forecasts
env3 = copy.deepcopy(env1)
env3.do_action = False
legend_map = {0:'Active',1:'+25 % demand', 2: 'No action'}


action = np.ones(18)
#action[[0,10]] = -1

for t_step in range(1,period):

    #action,_ = model.predict(obs)
    action2 = np.ones(18)
    action2[[0,10]] = -1
    
    for i, env in enumerate([env1, env2, env3]):
        obs, rewards, dones, info = env.step(action)
        current = env.powergrid.res_line.loading_percent
        actions += list(current)
        hues += [legend_map[i] for _ in range(len(current))]
        t_steps += list(t_step*np.ones_like(current))
        flex_loads += list(range(len(current)))


df = pd.DataFrame()
df['Line capacity [%]'] = actions
df['Hour'] = t_steps
df['Line'] = flex_loads
df[''] = hues

In [165]:
sns.set(style="ticks")
grid = sns.FacetGrid(df, col="Line", hue="",
                     col_wrap=6, height=1.5)


grid.map(plt.axhline, y=1, ls=":", c=".5")

grid.map(plt.plot, "Hour", "Line capacity [%]")
grid.add_legend()



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x26b4cbdf400>

In [166]:
@interact
def plot_current(line_nr=list(range(15))):
    data = df[df['Line']== line_nr]
    fig, ax = plt.subplots()
    sns.lineplot(x="Hour", y="Line capacity [%]", data=data, ax=ax, hue='')
    


interactive(children=(Dropdown(description='line_nr', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1…

In [167]:
data = df[df['Line']== 0]
data = data[data[''].isin(['No action','+25 % demand'])]
data = data[data['Hour'].isin(range(40,64))]
data['Hour'] = data['Hour'].apply(lambda x:change_hours(x, start=37))
fig, ax = plt.subplots()
sns.lineplot(x="Hour", y="Line capacity [%]", data=data, ax=ax, hue='')
#fig.savefig('figs/increase_demand_current.png')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x26b573d3160>

In [181]:
#pickle_savefig(fig,'increase_demand_current')

In [35]:
print(env.solar_forecasts[:5])
print(env2.solar_forecasts[:5])

[4.31223344e-17 4.31223344e-17 1.18536244e-02 8.62527254e-02
 2.02174315e-01]
[4.31223344e-17 4.31223344e-17 1.18536244e-02 8.62527254e-02
 2.02174315e-01]


In [36]:
print(env.demand_forecasts[0][:5])
print(env2.demand_forecasts[0][:5])

[0.07263449 0.19695374 0.35796542 0.41880813 0.45589402]
[0.07263449 0.19695374 0.35796542 0.41880813 0.45589402]


## See the difference in reward between agent and no-agent

In [59]:
calc_hour(env._episode_start_hour,current_step)

7

In [418]:
for reward in ['current']:
    period=100000
    model, env = load_env('flexible_load_first',seed=9) #seed 5: heavy sun, 9: weak sun
    env.set_parameters({'reward_terms': [reward]})
    rewards, t_steps, hues, hours = [], [], [], []
    obs = env.reset()

    env2 = copy.deepcopy(env)
    env2.do_action = False
    sol = env.solar_forecasts
    demand = env.demand_forecasts[0]

    show_sun, show_demand = True, True
    for t_step in range(1,period):

        action,_ = model.predict(obs)
        obs1, reward1, dones1, info1 = env.step(action)
        obs2, reward2, dones2, info2 = env2.step(action)

        current_step = env._current_step
        hour = calc_hour(env._episode_start_hour,current_step)
            

        if current_step == 0:
            sol = env.solar_forecasts
            demand = env.demand_forecasts[0]


        rewards.append(reward1)
        hues.append('Agent')
        t_steps.append(t_step)
        hours.append(hour)

        rewards.append(reward2)
        hues.append('No agent')
        t_steps.append(t_step)
        hours.append(hour)



        if show_sun:    
            rewards.append(sol[env._current_step-1])
            hues.append('Sun')
            t_steps.append(t_step)
            hours.append(hour)

        if show_demand:
            rewards.append(demand[env._current_step-1])
            hues.append('Demand')
            t_steps.append(t_step)
            hours.append(hour)

    df = pd.DataFrame()
    df['Reward'] = rewards
    df['Hours'] = t_steps
    df['Hour in the day'] = hours
    df[''] = hues
    #df.to_csv('data/config_hour_{}.csv'.format(reward),index=False)

Loading a model without an environment, this model cannot be trained until it has a valid environment.
setting up target updates ...
  target/pi/fc0/kernel:0 <- model/pi/fc0/kernel:0
  target/pi/fc0/bias:0 <- model/pi/fc0/bias:0
  target/pi/LayerNorm/beta:0 <- model/pi/LayerNorm/beta:0
  target/pi/LayerNorm/gamma:0 <- model/pi/LayerNorm/gamma:0
  target/pi/fc1/kernel:0 <- model/pi/fc1/kernel:0
  target/pi/fc1/bias:0 <- model/pi/fc1/bias:0
  target/pi/LayerNorm_1/beta:0 <- model/pi/LayerNorm_1/beta:0
  target/pi/LayerNorm_1/gamma:0 <- model/pi/LayerNorm_1/gamma:0
  target/pi/pi/kernel:0 <- model/pi/pi/kernel:0
  target/pi/pi/bias:0 <- model/pi/pi/bias:0
  target/qf/fc0/kernel:0 <- model/qf/fc0/kernel:0
  target/qf/fc0/bias:0 <- model/qf/fc0/bias:0
  target/qf/LayerNorm/beta:0 <- model/qf/LayerNorm/beta:0
  target/qf/LayerNorm/gamma:0 <- model/qf/LayerNorm/gamma:0
  target/qf/fc1/kernel:0 <- model/qf/fc1/kernel:0
  target/qf/fc1/bias:0 <- model/qf/fc1/bias:0
  target/qf/LayerNorm_1/beta:

In [86]:
#df.to_csv('data/confi2g_current.csv',index=False)

'current'

In [329]:
df = pd.read_csv('data/config1_hour_current.csv')
df = df.rename(columns={'Unnamed: 3':''})
#fig,ax = plt.subplots()
#sns.lineplot(x='Hours',y='Reward',data=df,hue='', ax=ax)

In [347]:
rewards = df[df[''].isin(['Agent','No agent'])]
rewards = rewards[rewards['Reward'] < 0]
stats = rewards[['','Reward']].groupby(['']).describe()['Reward']
stats
#stats.iloc[:,:-1].to_csv('data/stats_config1_current.csv',float_format='%.3f')

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
,,,,,,,,
Agent,1538.0,-0.166596,0.140616,-0.584875,-0.258477,-0.129204,-0.053929,-0.002494
No agent,1300.0,-0.168243,0.134652,-0.563625,-0.256932,-0.156176,-0.044891,-0.00033


In [249]:
cat1 = df[df[''] == 'Agent']['Reward'].values
cat2 = df[df[''] == 'No agent']['Reward'].values
wilcoxon(cat1, cat2)

WilcoxonResult(statistic=10896814.0, pvalue=2.905272384475525e-283)

In [348]:
stats['count']*stats['mean']


Agent      -256.224468
No agent   -218.716002
dtype: float64

In [367]:
mean_agent =(505.9/9022)
mean_noagent = (522.5/9823) 
mean_agent/mean_noagent

1.0541919751718023

In [250]:
fig, ax = plt.subplots()
sns.boxplot(x="", y="Reward", data=rewards, ax=ax)
ax.set_ylabel('Current reward')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Current reward')

In [186]:
#pickle_savefig(fig,'config1_100ep_boxplot_current')

## Period the agent is good

In [251]:
rewards = df[df[''].isin(['Agent','No agent'])] #voltage: (15600,16000)
data = rewards
data = data[data['Hours'].isin(range(15600,16000))]
data['Hours'] = data['Hours'].apply(lambda x:change_hours(x, start=15600))
fig, ax = plt.subplots()
sns.lineplot(x="Hours", y="Reward", data=data, ax=ax, hue='')
ax.set_ylabel('Voltage reward')
ax.set_xlabel('Hour')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Hour')

In [199]:
#pickle_savefig(fig,'config1_400hour_good_voltage')

In [252]:
#stats = data[data['Reward'] < 0].groupby('')['Reward'].describe()
stats  = data[['','Reward']].groupby([''])['Reward'].describe()
stats
#stats.iloc[:,:-1].to_csv('data/config1_400hour_good_voltage.csv',float_format='%.3f')


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
,,,,,,,,
Agent,400.0,-0.002516,0.012521,-0.103773,0.0,0.0,0.0,0.0
No agent,400.0,-0.003809,0.0169,-0.127999,0.0,0.0,0.0,0.0


## Periods the agent is bad in terms of voltage
The agent has not learned to cope with high solar power production, period with high solar production have been found manually

In [253]:
rewards = df[df[''].isin(['Agent','Sun'])] #current (14570,14640), voltage (9800,9950)
data = rewards
data = data[data['Hours'].isin(range(14570,14640))]
data['Hours'] = data['Hours'].apply(lambda x:change_hours(x, start=14568))
data[''] = data[''].apply(lambda x: 'Current reward' if x == 'Agent' else x)
fig, ax = plt.subplots()
sns.lineplot(x="Hours", y="Reward", data=data, ax=ax, hue='')
ax.set_ylabel('Current reward')
ax.set_xlabel('Hour')
ax2 = plt.twinx()
ax2.set_ylabel('Solar irradiance')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Solar irradiance')

In [193]:
#pickle_savefig(fig,'config1_bad_current')


In [254]:
stats = data[data['Reward'] < 0].groupby('')['Reward'].describe()
stats
#stats.iloc[:,:-1].to_csv('data/config1_150hour_bad_voltage.csv',float_format='%.3f')

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
,,,,,,,,
Current reward,14.0,-0.039984,0.032464,-0.096699,-0.066309,-0.032039,-0.009327,-0.000107


## Distribution in critical hours

In [332]:

agent = df[df[''] == 'Agent']

normal = df[df[''] == 'No agent']
bad_normal = normal[normal['Reward'] < 0]
bad_hours = bad_normal['Hours']
bad_agent = agent[agent['Hours'].isin(bad_hours.values)]
bad_agent['No agent'] = bad_normal['Reward'].values
bad_agent['normal_hours'] = bad_hours.values
bad_agent = bad_agent.rename(columns={'Reward':'Agent'})
assert all(bad_agent['normal_hours'] == bad_agent['Hours'])

In [334]:
stacked = stack_columns(bad_agent,['Agent', 'No agent'])
fig, ax = plt.subplots()
sns.boxplot(x='', y="Reward", data=stacked, ax=ax)
ax.set_ylabel('Current reward')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Current reward')

In [335]:
#pickle_savefig(fig, 'config1_current_boxplot')

In [43]:
 (118.3-115.7) /118.3

0.02197802197802193

In [292]:
stats = stacked.groupby('')['Reward'].describe()
stats
#stats.iloc[:,:-1].to_csv('data/config1_voltage_critical.csv',float_format='%.3f')


## T-test to see find difference

In [258]:
cat1 = bad_agent['Agent']
cat2 = bad_agent['No agent']
wilcoxon(cat1, cat2)

WilcoxonResult(statistic=9979077.0, pvalue=7.212953509263093e-298)

## Sort rewards of the agent

In [336]:
fig, ax = plt.subplots()
sorted_agent = bad_agent.sort_values(by='No agent',ascending=False)
sorted_agent = sorted_agent.reset_index()
sorted_agent.plot(y=['Agent','No agent'],ax=ax)
ax.set_xlabel('Current violation')
ax.set_ylabel('Current reward')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Current reward')

In [337]:
#pickle_savefig(fig,'config1_sorted_current')

In [58]:
data.quantile(0.86)

Reward              0.424817
Hours              61.540000
Hour in the day    20.000000
Name: 0.86, dtype: float64

In [338]:
data = sorted_agent[['Agent','No agent']]
data[data['Agent'] > data['No agent']].describe()

Unnamed: 0,Agent,No agent
count,48.0,48.0
mean,0.0,-0.009607
std,0.0,0.005349
min,-0.0,-0.016965
25%,-0.0,-0.016965
50%,-0.0,-0.00712
75%,-0.0,-0.004737
max,-0.0,-0.004737


In [307]:
1- 0.022934/0.036768

0.37625108790252393

In [339]:
(data['Agent'] < data['No agent']).mean()

0.963076923076923

In [61]:
data.cumsum().plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ad817ac8d0>

## Trained agent in non-critical periods

In [262]:
agent = df[df[''] == 'Agent']

normal = df[df[''] == 'No agent']
data = agent[['Hours']].reset_index(drop=True)
data['Agent'] = agent['Reward'].values
data['No agent'] = normal['Reward'].values
assert all(data['Hours'].values == normal['Hours'].values)

worse = data[data['Agent'] < data['No agent']]
worse.plot(x='Hours')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ada09bd908>

In [263]:
(normal['Reward'] == 0).mean()

0.9140691406914069

## Non critical hours when the normal operation give no penalty
Most of the time the safety margins are not violated. How is the trained agent behaving in these hours?

In [317]:

agent = df[df[''] == 'Agent']

normal = df[df[''] == 'No agent']
bad_normal = normal[normal['Reward'] == 0]
bad_hours = bad_normal['Hours']
bad_agent = agent[agent['Hours'].isin(bad_hours.values)]
bad_agent['No agent'] = bad_normal['Reward'].values
bad_agent['normal_hours'] = bad_hours.values
bad_agent = bad_agent.rename(columns={'Reward':'Agent'})
assert all(bad_agent['normal_hours'] == bad_agent['Hours'])
data = bad_agent[['Agent','No agent']]

In [319]:
fig, ax = plt.subplots()
data[data['Agent'] < data['No agent']]['Agent'].sort_values().reset_index(drop=True).plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2adb082b2e8>

In [314]:
no_agent.groupby(no_agent['Reward'] < 0)['Reward'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Reward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
False,91406.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
True,8593.0,-0.035439,0.036929,-0.185599,-0.053629,-0.022419,-0.005713,-1.6e-05


In [316]:
1-8593/(91406 + 8593)

0.9140691406914069

In [82]:
bad_agent[['Agent','No agent']].sort_values(by='Agent').reset_index().plot(y='Agent')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x16fd5b7e4a8>

In [320]:
bad_agent[bad_agent['Agent'] < 0].describe()

Unnamed: 0,Agent,Hours,Hour in the day,No agent,normal_hours
count,243.0,243.0,243.0,243.0,243.0
mean,-0.000324,54596.868313,12.522634,0.0,54596.868313
std,0.000228,29564.12066,1.420955,0.0,29564.12066
min,-0.000708,22.0,10.0,0.0,22.0
25%,-0.000469,29649.5,11.0,0.0,29649.5
50%,-0.000233,58135.0,13.0,0.0,58135.0
75%,-0.000159,80715.0,13.0,0.0,80715.0
max,-3.3e-05,99567.0,15.0,0.0,99567.0


In [321]:
243/91406

0.0026584688094873424

## Plot error by hour of the day
make boxplot for each hour of the day that show error distribution

In [341]:
agent = df[df[''] =='Agent']
no_agent = df[df[''] =='No agent']
bad_normal = no_agent[no_agent['Reward'] < 0]
bad_hours = bad_normal['Hours']
bad_agent = agent[agent['Hours'].isin(bad_hours.values)]

In [343]:
assert all(bad_agent['Hours'].values == bad_normal['Hours'].values)
diff = pd.DataFrame()
diff['Improvement'] = bad_agent['Reward'].values - bad_normal['Reward'].values
diff['Hour'] = bad_agent['Hour in the day'].values
zero_data = []
for h in range(7,24):
    if h not in set(diff['Hour']):
        zero_data.append({'Hour':h,'Improvement':0})

if len(zero_data) > 0:
    diff = diff.append(zero_data)


fig, axes = plt.subplots()
sns.boxplot(x='Hour', y="Improvement", data=diff, ax=axes, color='grey')
axes.axhline(0,c=".5",ls='--')
axes.set_xlim(3) #V: 6.4,21.6
axes.set_xlabel('Hour of the day')
axes.set_ylabel('Current reward improvement')


<IPython.core.display.Javascript object>

Text(0, 0.5, 'Current reward improvement')

In [344]:
diff.groupby(by='Hour').describe()

Unnamed: 0_level_0,Improvement,Improvement,Improvement,Improvement,Improvement,Improvement,Improvement,Improvement
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Hour,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0,1.0,-0.026502,,-0.026502,-0.026502,-0.026502,-0.026502,-0.026502
5,1.0,-0.024438,,-0.024438,-0.024438,-0.024438,-0.024438,-0.024438
7,2.0,-0.024779,0.00499,-0.028308,-0.026543,-0.024779,-0.023015,-0.02125
8,1.0,0.0,,0.0,0.0,0.0,0.0,0.0
9,1.0,0.0,,0.0,0.0,0.0,0.0,0.0
10,121.0,-0.022702,0.001981,-0.025444,-0.023828,-0.023318,-0.020486,-0.019998
11,226.0,-0.025116,0.002682,-0.032427,-0.02624,-0.025299,-0.023012,-0.021587
12,307.0,-0.027388,0.002989,-0.034216,-0.028999,-0.026856,-0.026514,-0.02125
13,365.0,-0.02888,0.003432,-0.037879,-0.031136,-0.028598,-0.025618,-0.024438
14,208.0,-0.031032,0.003428,-0.03739,-0.033205,-0.031149,-0.0291,-0.023643


In [345]:
#pickle_savefig(fig,'config1_improvement_current')

## Nominal load values
See the difference in nominal loads between the loads

In [465]:
fig,ax = plt.subplots()
net = env.powergrid
nominal_load = net.load[['name','bus','sn_kva']]
nominal_load = nominal_load.groupby('bus').sum()
nominal_load['Load share'] = nominal_load['sn_kva'] / nominal_load['sn_kva'].sum() * 100
nominal_load['sn_kva'] = nominal_load['sn_kva'].astype('int')
nominal_load['sn_kva'] /= 1000
nominal_load['sn_kva'].plot(kind='bar', ax=ax)
ax.set_ylabel('Nominal load [MVA]')
ax.set_xlabel('Bus')

#nominal_load.to_csv('data/nominal_load.csv',float_format='%.1f')
#pickle_savefig(fig,'nominal_load')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Bus')

## Nominal sgen values
See the difference in nominal loads between the loads

In [477]:
fig,ax = plt.subplots()
net = env.powergrid
nominal_sgen = net.sgen[['name','bus','sn_kva']]
nominal_sgen = nominal_sgen.groupby('bus').sum()
nominal_sgen['Load share'] = nominal_sgen['sn_kva'] / nominal_sgen['sn_kva'].sum() * 100
nominal_sgen['Solar production'] = nominal_sgen['sn_kva'].astype('int')
nominal_sgen['Solar production'] /= 1000
nominal_sgen['Consumption'] = nominal_load['sn_kva']
nominal_sgen[['Solar production','Consumption']].plot(kind='bar', ax=ax)
ax.set_ylabel('Nominal apparent power [MVA]')
ax.set_xlabel('Bus')

#nominal_load.to_csv('data/nominal_sgen.csv',float_format='%.1f')
#pickle_savefig(fig,'nominal_sgen')

<IPython.core.display.Javascript object>

## See actions of agent at buses with a high implact

In [181]:
def simulate_day3(env, model, show_imbalance=False, show_solar=True,
                  show_action=True,
                  show_demand=False, period=25):
    """
    simulate grid and save info about action of each bus, hour of day etc.
    :param env:
    :param model:
    :param show_imbalance:
    :param show_solar:
    :param show_action:
    :param show_demand:
    :param period:
    :return:
    """
    net = env.powergrid
    actions, t_steps, flex_loads, sols, hours = [], [], [], [], []

    obs = env.reset()
    sol = env.solar_forecasts
    demand = env.demand_forecasts[0]
    hues = []
    for t_step in range(1, period):

        action, _ = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        
        
        if env._current_step == 0:
            sol = env.solar_forecasts
            demand = env.demand_forecasts[0]

        if show_action:
            actions += list(action)
            hues += ['action' for _ in range(len(action))]
            t_steps += list(t_step * np.ones_like(action))
            flex_loads += list(net.load.index)

        if show_solar:
            actions += list(sol[env._current_step - 1] * np.ones_like(action))
            hues += ['sun' for _ in range(len(action))]
            t_steps += list(t_step * np.ones_like(action))
            flex_loads += list(net.load.index)
        if show_imbalance:
            try:
                imbalance = env.calc_balance() / 30000
            except AttributeError:
                imbalance = env.calc_imbalance() / 30000
            actions += list(imbalance * np.ones_like(action))
            hues += ['imbalance' for _ in range(len(action))]
            t_steps += list(t_step * np.ones_like(action))
            flex_loads += list(net.load.index)

        if show_demand:
            actions += list(demand[env._current_step - 1] * np.ones_like(action))
            hues += ['demand' for _ in range(len(action))]
            t_steps += list(t_step * np.ones_like(action))
            flex_loads += list(net.load.index)

        hour = calc_hour(env._episode_start_hour, env._current_step)
        hours += [hour for _ in range(len(action)*3)]

    df = pd.DataFrame()
    df['actions'] = actions
    df['steps'] = t_steps
    df['load'] = flex_loads
    df['hour'] = hours
    df[''] = hues
    return df

In [188]:
model, env = load_env(seed=9)
df = simulate_day3(env,model, show_demand=True, period=100000)

Loading a model without an environment, this model cannot be trained until it has a valid environment.
setting up target updates ...
  target/pi/fc0/kernel:0 <- model/pi/fc0/kernel:0
  target/pi/fc0/bias:0 <- model/pi/fc0/bias:0
  target/pi/LayerNorm/beta:0 <- model/pi/LayerNorm/beta:0
  target/pi/LayerNorm/gamma:0 <- model/pi/LayerNorm/gamma:0
  target/pi/fc1/kernel:0 <- model/pi/fc1/kernel:0
  target/pi/fc1/bias:0 <- model/pi/fc1/bias:0
  target/pi/LayerNorm_1/beta:0 <- model/pi/LayerNorm_1/beta:0
  target/pi/LayerNorm_1/gamma:0 <- model/pi/LayerNorm_1/gamma:0
  target/pi/pi/kernel:0 <- model/pi/pi/kernel:0
  target/pi/pi/bias:0 <- model/pi/pi/bias:0
  target/qf/fc0/kernel:0 <- model/qf/fc0/kernel:0
  target/qf/fc0/bias:0 <- model/qf/fc0/bias:0
  target/qf/LayerNorm/beta:0 <- model/qf/LayerNorm/beta:0
  target/qf/LayerNorm/gamma:0 <- model/qf/LayerNorm/gamma:0
  target/qf/fc1/kernel:0 <- model/qf/fc1/kernel:0
  target/qf/fc1/bias:0 <- model/qf/fc1/bias:0
  target/qf/LayerNorm_1/beta:

In [189]:
df.to_csv('data/config1_actions.csv',index=False)

In [149]:
df[(df['load'] == 0) & (df['']=='action')].head()

Unnamed: 0,actions,steps,load,hour,Unnamed: 5
0,-0.980185,1.0,0,19,action
54,-0.984327,2.0,0,20,action
108,-0.977257,3.0,0,21,action
162,-0.99344,4.0,0,22,action
216,-0.986921,5.0,0,23,action


In [107]:
#df['actions'] * df['load'].apply(lambda x: net.load[net.load.bus == x]['sn_kva'].sum())

In [190]:
load_bus = dict(zip(net.load.index,net.load.bus))


data = df[df[''] == 'action']
demand =  df[df[''] == 'demand']
assert  (data[['steps','load']] .values == demand[['steps','load']] .values).all()

data['delta'] = data['actions'].abs()*demand['actions'].values
data['bus'] = data['load'].apply(lambda x: load_bus[x])
fig, ax = plt.subplots()
sns.boxplot(x='bus',y='delta', data=data, ax=ax)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x125d9a3aa20>

### group power change by hour

In [191]:
fig, ax = plt.subplots()
hour_mean = data.groupby('hour').mean()
sns.lineplot(x = 'hour', y='actions', data=data, ax=ax, color='grey')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x125c25db438>

In [192]:
ax.set_xlabel('Hour of the day')
ax.set_ylabel('Change in demand')

Text(14.000000000000002, 0.5, 'Change in demand')

In [193]:
#pickle_savefig(fig,'config1_action_hour')

In [166]:
data.groupby('hour').mean()

Unnamed: 0_level_0,actions,steps,load,delta,bus
hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,-0.062932,102.0,8.5,0.253411,7.944444
1,-0.172156,91.0,8.5,0.028468,7.944444
2,-0.162012,92.0,8.5,0.02404,7.944444
3,-0.133925,93.0,8.5,0.035685,7.944444
4,-0.134526,94.0,8.5,0.051193,7.944444
5,-0.145615,95.0,8.5,0.059407,7.944444
6,-0.120087,96.0,8.5,0.108545,7.944444
7,-0.075695,97.0,8.5,0.173712,7.944444
8,-0.054742,98.0,8.5,0.196145,7.944444
9,-0.035993,99.0,8.5,0.199598,7.944444


In [67]:
df.groupby('steps').mean().plot(y='actions')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x125af8a01d0>