# First Results 

**Summary of the Article**
- Methodology Description.
- Data Description.
- Training.
- Testing and Evalutation 

## Methodology Description

The **objective** of this notebook is to obtain the first reuslts of predictions of constraints in  the power flow calculation. The constraints predicted are:
- Voltage constraints:
    - Maximum Voltage magnitude constraints.
    - Minimum Voltage magnitude constraints.
- Current constraints:
    - Maximum Current magnitude constraints.

The **training data** is obtained from the following sources:
    Target data:
    - Constraints amplitudes of the power flow results create in the `create_target_features.ipynb`. 
    Exgogenous data:
    - Data non related to the test-grid (e.g meteorological data, cos(hour)), created in the `feature_engineering.ipynb`.

The **models** trained will be:
- Linear Regression.
- Gradient Boost Regression.

The **metrics** used to evaluate the models will be the metric proposed in the `ml_hybrid_metrics.ipynb` notebook.


## Data Upload and Description

Target data:

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
y_min_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_constr.csv')
y_max_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_constr.csv')
y_max_i = pd.read_csv('..\data\ground_truth\\res_line_percent_max_constr.csv')
# drop timestemps
y_min_u = y_min_u.drop(columns=['timestamps'])
y_max_u = y_max_u.drop(columns=['timestamps'])
y_max_i = y_max_i.drop(columns=['timestamps'])

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(30, 25))
# set the x-axis label
axs[0].set_xlabel('Time')
axs[1].set_xlabel('Time')
axs[2].set_xlabel('Time')
# set the y-axis label
axs[0].set_ylabel('Bus Voltage [p.u.]')
axs[1].set_ylabel('Line Current [kA]')
axs[2].set_ylabel('Line Loading [%]')
# set the title, bold and fontsize of the title
axs[0].set_title('Bus Voltage Constraints', fontsize=20, fontweight='bold')
axs[1].set_title('Line Current Constraints', fontsize=20, fontweight='bold')
axs[2].set_title('Line Loading Constraints', fontsize=20, fontweight='bold')
# Set grid
axs[0].grid(True)
axs[1].grid(True)
axs[2].grid(True)
# Data
axs[0].plot(y_min_u)
axs[1].plot(y_max_u)
axs[2].plot(y_max_i)
# Don't print nothing on console
plt.show()

Exogenous data:

In [None]:
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv')
# drop date
exogenous_data = exogenous_data.drop(columns=['date'])
X = exogenous_data
X.head(2)

Traing test split with sklearn.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
le = LabelEncoder()

def split_and_suffle(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)
    X_train['season'] = le.fit_transform(X_train['season'])
    X_test['season'] = le.fit_transform(X_test['season'])   
    X_train, y_train = shuffle(X_train, y_train)
    return X_train, X_test, y_train, y_test

X_min_u_train, X_min_u_test, y_min_u_train, y_min_u_test = split_and_suffle(X, y_min_u)
X_max_u_train, X_max_u_test, y_max_u_train, y_max_u_test = split_and_suffle(X, y_max_u)
X_max_i_train, X_max_i_test, y_max_i_train, y_max_i_test = split_and_suffle(X, y_max_i)
print(hex(id(X_min_u_train)))
print(hex(id(X_max_u_train)))
print(hex(id(X_max_i_train)))

## Training
TODO add introductionb


In [None]:
import sys
import os
sys.path.append('..')
from thesis_package import aimodels as my_ai, utils

In [None]:
if 'regressor_min_u.pickle' not in os.listdir('pickles'):
    regressor_min_u = my_ai.Context(strategy=my_ai.LinearRegressionStrategy())
    regressor_min_u.fit(data={'X_train': X_min_u_train, 'y_train': y_min_u_train})
    # Gradient Boost Regression
    hyper_params = {'n_estimators': 1000, 'learning_rate': 0.1, 'loss': 'squared_error'}
    regressor_min_u.strategy = my_ai.GradientBoostRegressorStrategy(hyper_params)
    regressor_min_u.fit(data={'X_train': X_min_u_train.values, 'y_train': y_min_u_train.values})
    utils.serialize_object('pickles\\regressor_min_u', regressor_min_u)
else: 
    regressor_min_u = utils.deserialize_object('pickles\\regressor_min_u')
prediction_lr_min_u = regressor_min_u.strategies[0].predict(data={'X_test': X_min_u_test})
prediction_lr_min_u = pd.DataFrame(prediction_lr_min_u , columns=y_min_u_test.columns)
prediction_gb_min_u =  regressor_min_u.strategies[1].predict(data={'X_test': X_min_u_test})
prediction_gb_min_u = pd.DataFrame(prediction_gb_min_u, columns=y_min_u_test.columns)

In [None]:
# Same as cell above but for max_u
if 'regressor_max_u.pickle' not in os.listdir('pickles'):
    regressor_max_u = my_ai.Context(strategy=my_ai.LinearRegressionStrategy())
    regressor_max_u.fit(data={'X_train': X_max_u_train, 'y_train': y_max_u_train})
    # Gradient Boost Regression
    hyper_params = {'n_estimators': 1000, 'learning_rate': 0.1, 'loss': 'squared_error'}
    regressor_max_u.strategy = my_ai.GradientBoostRegressorStrategy(hyper_params)
    regressor_max_u.fit(data={'X_train': X_max_u_train.values, 'y_train': y_max_u_train.values})
    utils.serialize_object('pickles\\regressor_max_u', regressor_max_u)
else: 
    regressor_max_u = utils.deserialize_object('pickles\\regressor_max_u')
prediction_lr_max_u = regressor_max_u.strategies[0].predict(data={'X_test': X_max_u_test})
prediction_lr_max_u = pd.DataFrame(prediction_lr_max_u , columns=y_max_u_test.columns)
prediction_gb_max_u =  regressor_max_u.strategies[1].predict(data={'X_test': X_max_u_test})
prediction_gb_max_u = pd.DataFrame(prediction_gb_max_u, columns=y_max_u_test.columns)

In [None]:
# Same as cell above but for max_i
if 'regressor_max_i.pickle' not in os.listdir('pickles'):
    regressor_max_i = my_ai.Context(strategy=my_ai.LinearRegressionStrategy())
    regressor_max_i.fit(data={'X_train': X_max_i_train, 'y_train': y_max_i_train})
    # Gradient Boost Regression
    hyper_params = {'n_estimators': 1000, 'learning_rate': 0.1, 'loss': 'squared_error'}
    regressor_max_i.strategy = my_ai.GradientBoostRegressorStrategy(hyper_params)
    regressor_max_i.fit(data={'X_train': X_max_i_train.values, 'y_train': y_max_i_train.values})
    utils.serialize_object('pickles\\regressor_max_i', regressor_max_i)
else:
    regressor_max_i = utils.deserialize_object('pickles\\regressor_max_i')
prediction_lr_max_i = regressor_max_i.strategies[0].predict(data={'X_test': X_max_i_test})
prediction_lr_max_i = pd.DataFrame(prediction_lr_max_i , columns=y_max_i_test.columns)
prediction_gb_max_i =  regressor_max_i.strategies[1].predict(data={'X_test': X_max_i_test})
prediction_gb_max_i = pd.DataFrame(prediction_gb_max_i, columns=y_max_i_test.columns)

## Evaluation
TODO add introduction

In [None]:
import beepy 
import numpy as np
beepy.beep('coin')
from thesis_package import metrics 
metric = metrics.Metrics()
cols=['experience', 'model', 'TP', 'FP', 'FN', 'TN', 'accuracy', 'precision', 'recall', 'f1', 'TP_rmse', 'FP_rmse', 'FN_rmse', 'TN_rmse']
results = pd.DataFrame(columns=cols)
#threshold_signal = pd.Series(np.ones([2000]) * threshold)
def write_result(results, prediction, y_test, threshold, metric, experience, model):
    metric.get_prediction_scores(prediction, y_min_u_test, threshold=threshold)
    metric.get_report()
    # Create new row and concat it to the dataframe results.
    row = pd.Series()
    row['experience'] = experience
    row['model'] = model
    row['TP'] = metric.true_positives_ctr
    row['FP'] = metric.false_positives_ctr
    row['FN'] = metric.false_negatives_ctr
    row['TN'] = metric.true_negatives_ctr
    row['accuracy'] = metric.accuracy
    row['precision'] = metric.precision
    row['recall'] = metric.recall
    row['f1'] = metric.f1_score
    row['TP_rmse'] = metric.true_positives_rmse
    row['FP_rmse'] = metric.false_positives_rmse
    row['FN_rmse'] = metric.false_negatives_rmse
    row['TN_rmse'] = metric.true_negatives_rmse
    # Add row to results
    results.loc[len(results)] = row
    return results
try:
    # Min U Gradient Boost Regression
    threshold = y_min_u_train.loc[:, y_min_u_train.max(axis=0) != 0].max(axis=0).mean() * 0.1 
    results = write_result(results, prediction_lr_min_u, y_min_u_test, threshold, metric, 'min_u', 'Linear Regression')
    results = write_result(results, prediction_gb_min_u, y_min_u_test, threshold, metric, 'min_u', 'Gradient Boost')
    # Max U Linear Regression
    threshold = y_max_u_train.loc[:, y_max_u_train.max(axis=0) != 0].max(axis=0).mean() * 0.1 
    results = write_result(results, prediction_lr_max_u, y_max_u_test, threshold, metric, 'max_u', 'Linear Regression')
    results = write_result(results, prediction_gb_max_u, y_max_u_test, threshold, metric, 'max_u', 'Gradient Boost')
    # Max I Linear Regression
    threshold = y_max_i_train.loc[:, y_max_i_train.max(axis=0) != 0].max(axis=0).mean() * 0.1 
    results = write_result(results, prediction_lr_max_i, y_max_i_test, threshold, metric, 'max_i', 'Linear Regression')
    results = write_result(results, prediction_gb_max_i, y_max_i_test, threshold, metric, 'max_i', 'Gradient Boost')
except(Exception) as e:
    beepy.beep('robot error')
results = results.set_index(['experience', 'model'])
beepy.beep('success')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
threshold = y_max_u_train.loc[:, y_max_u_train.max(axis=0) != 0].max(axis=0).mean() * 0.1 
threshold_signal = pd.Series(np.ones([len(y_max_u_test)]) * threshold)
# Plot prediction_gb_max_u
fig, axs = plt.subplots(1, 2, figsize=(30, 10))
axs[0].plot(prediction_gb_max_u[6000:6250])
axs[1].plot(y_max_u_test.reset_index(drop=True)[6000:6250])
axs[0].plot(threshold_signal[6000:6250])
axs[1].plot(threshold_signal[6000:6250])
axs[0].set_title('Prediction Gradient Boost Regression')
axs[1].set_title('Actual')
axs[0].set_xlabel('Time')
axs[1].set_xlabel('Time')
plt.show()

## 