In [1]:
# We create the environment
import os
import pypownet.environment
data_dir = 'sample_data'  
environment = pypownet.environment.RunEnv(parameters_folder=os.path.abspath(data_dir),
                                              game_level="hard",
                                              chronic_looping_mode='natural', start_id=0,
                                              game_over_mode="soft")

Using custom reward signal CustomRewardSignal of file /home/tp-home009/agardil/Documents/projects/Mini-projet-Electricity/starting-kit/sample_data/reward_signal.py



                     GAME PARAMETERS
    loadflow_backend: pypower
    n_timesteps_hard_overflow_is_broken: 10
    max_number_prods_game_over: 3
    n_timesteps_horizon_maintenance: 48
    max_seconds_per_timestep: 1.0
    max_number_loads_game_over: 6
    hard_overflow_coefficient: 1.0
    n_timesteps_soft_overflow_is_broken: 10
    n_timesteps_consecutive_soft_overflow_breaks: 10
    loadflow_mode: AC



In [4]:
# We import our agents
from pypownet.agent import GreedySearch, RandomNodeSplitting, RandomLineSwitch

# Instantiation of agents.
GS_agent = GreedySearch(environment)
GS_agent.verbose = False

RNS_agent = RandomNodeSplitting(environment)
RNS_agent.verbose = False

RLS_agent = RandomLineSwitch(environment)
RLS_agent.verbose = False


Storing actions at saved_actions.csv
Storing actions at saved_actions_RandomNodeSplitting.csv
Storing actions at saved_actions_RandomLineSwitch.csv


In [6]:
# Creation of data for supervised learning.

import numpy as np

# initial state
current_state = environment.reset()

# States and actions are saved in lists.
X = []
Y = []
learning_steps = 10000

# We repeat the learning processs 'learning_steps' times
for i in range(learning_steps): 
    #print(i/100)
    
    #Test if the grid has broken down (GameOver) we reset the environement
    if current_state is None:
        current_state = environment.reset()
    
    # Trying to disturb the environement
    # in order to get crutial states and see what gready search do and try to learn from him
    # with probability of 0.3, we take random actions in order to disturb the grid
    # Aving the two types of perturbations improve the data significantly
    
    epsilon = np.random.random()
    
    if epsilon < 0.15:
        current_action = RNS_agent.act(current_state)
        #We save nothing
    elif epsilon < 0.30:
        current_action = RLS_agent.act(current_state)
        #We save nothing
    else:
        current_action = GS_agent.act(current_state)
        # We save the actions and the state in order to do the supervised learning
        X.append(current_state.as_array())
        Y.append(current_action.as_array())
        
    
    
    # We give the action to the environnement, and get back a new state
    new_state, action, reward, _ = environment.step(current_action)
    current_state = new_state

# numpy.array are more practicle to use
X = np.array(X)
Y = np.array(Y)

print(X.shape)
print(Y.shape)

(7000, 428)
(7000, 76)


In [7]:
# We make the correspondences between actions and labels
# allows to reduce the space of the actions, and to avoid taking some that do not have any sense.

#print(Y)

# We create a table with a single copy of each action performed.
U = np.unique(Y, axis=0)

# Functions to recover the status from the label, and vice versa

def Y_array_to_Y_label(Y, U):
    """ Function to transform an array of actions into their labels, ie their index in the table U. """
    Nb_actions = Y.shape[0] # number of actions inside Y
    Nb_labels = U.shape[0] # number of actions inside U
    
    # An empty id
    Y_id = np.full(Nb_actions, 0)

    #we searche the index of each action in the label table and add it to res
    for i in range(Nb_actions):
        for j in range(Nb_labels):
            if (Y[i] == U[j]).all():
                Y_id[i] = j
                
    return Y_id

def Y_label_to_Y_array(Y_id, U):
    """ Function to transform an array of indices into their respectival actions, according to table U. """
    # Number of actions in Y_id
    N = Y_id.shape[0]
    res = []
    
    # We use the correspundance table (the id is simply the index of the table)
    for i in Y_id:
        res.append(U[i])
    
    return np.array(res)
    
    
# Let's run some tests
Y_label = Y_array_to_Y_label(Y, U)
assert (U.size < 2) or (Y_array_to_Y_label(U[0], U) == Y_array_to_Y_label(U[1], U)).all()
assert (Y_label_to_Y_array(Y_label, U) == Y).all()


In [8]:
# we import the classifier, and a module to save its parameters.
from sklearn.neural_network import MLPClassifier
import pickle

# Instanciation of our classifier, which consists here in a multi-layered perceptron
clf = MLPClassifier(solver='adam', alpha=1e-5, batch_size = 10 ,hidden_layer_sizes=(100,100), random_state=1)

# We train our classifier
clf.fit(X, Y_label)
print(clf.score(X, Y_label))

#tests
#assert clf.predict(np.array([X[0]]))[0] == Y_label[1]

# What's the score of our model ?
N = X.shape[0]
Y_predit = clf.predict(X)
score = 0
for y_exacte, y_exp in zip(Y_label, Y_predit):
    if y_exacte == y_exp:
        score += 1
print("sur l'ensemble d'apprentissage, score de " + str(100*score/N) + " %")
    


# We save the parameters, to load them into our imitation agent

filename = 'parameters_MLP.sav'
pickle.dump(clf, open(filename, 'wb'))
np.save('tableauU.npy', U)


0.732
sur l'ensemble d'apprentissage, score de 73.2 %


In [9]:
# Details about the classifier
clf

MLPClassifier(activation='relu', alpha=1e-05, batch_size=10, beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [10]:
# Details about the data
print(X.shape)
print(Y_label.shape)

(7000, 428)
(7000,)


In [12]:
# An example of state array
X[0]

array([ 1.67000008e+01,  8.00999985e+01,  3.90000000e+01,  6.19999981e+00,
        8.60000038e+00,  2.42999992e+01,  6.90000010e+00,  2.70000005e+00,
        4.90000010e+00,  1.11000004e+01,  1.23999996e+01,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  1.67999992e+01,  7.90999985e+01,
        3.82999992e+01,  6.09999990e+00,  8.69999981e+00,  2.37999992e+01,
        6.80000019e+00,  2.70000005e+00,  4.80000019e+00,  1.10000000e+01,
        1.23999996e+01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        8.80000000e+01, -0.00000000e+00,  7.80000000e+01,  4.74000015e+01,
       -0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [13]:
# An example of action array
Y[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])