# Machine Learning Hybrid Metrics
Explain what I want with this yada yada ... 

## Target Values 

In [1]:
import pandas as pd
output = pd.read_csv('..\data\ground_truth\pf_res_bus_vm_pu.csv')
# create a timestamps variable and convert it to datetime
timestamps = output['timestamps'].apply(lambda x: pd.to_datetime(x))
output.drop(['timestamps'], axis=1, inplace=True)
output = output.apply(lambda x: (0.95 - x).apply(lambda y: max(0, y)))


In [2]:
output

Unnamed: 0,ext_grid,bus_1,bus_2,bus_3,bus_4,bus_5,bus_6,bus_7,bus_8,bus_9,...,bus_30,bus_31,bus_17,bus_21,bus_24,bus_18,bus_23,bus_27,bus_32,bus_33
0,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.004559,0,0,0,0
1,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.003854,0,0,0,0
2,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.003177,0,0,0,0
3,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.002410,0,0,0,0
4,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.002373,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45211,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.000000,0,0,0,0
45212,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.000000,0,0,0,0
45213,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.000000,0,0,0,0
45214,0,0,0,0,0,0,0,0.0,0.0,0.0,...,0,0,0,0,0,0.000000,0,0,0,0


## Features


### Exogenous Data

In [3]:
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv')
exogenous_data.drop(['date'], axis=1, inplace=True)

### Network Active and Reactive Power

In [4]:
import sys
sys.path.append('..')
from thesis_package import elements as el, powerflow as pf
network = el.Network()
network.create_network_from_xlsx(xlsx_file_path="..\data\\raw\\Data_Example_32.xlsx")
# Create the pandapower network.
network.create_pandapower_model() # Property name: net_model.
# Plot the network.
network.plot_network()
# Method that receives the .csv files folder and adds the gen profile to the grid elements.
network.add_generation_profiles(generation_profiles_folder_path='..\data\processed\production')
# Method that receives a .csv files folder and adds the load profile to the grid elements.
network.add_load_profiles(load_profiles_folder_path='..\data\processed\consumption')
# Create the active and reactive power profiles.
net = network.net_model
# Load gen profiles.
power_flow = pf.Power_Flow()
power_flow.create_power_flow_profiles_df(network)
# Active gen profile.
p_gen_profile_kw  = - power_flow.p_gen_profile_kw
# Add 'active_' to every column name of the p_gen_profile dataframe.
p_gen_profile_kw.columns = ['active_' + i for i in p_gen_profile_kw.columns]
# Reactive gen profile.
q_gen_profile_kvar  = - power_flow.q_gen_profile_kvar
# Add 'reactive_' to every column name of the q_gen_profile dataframe.
q_gen_profile_kvar.columns = ['reactive_' + i for i in q_gen_profile_kvar.columns]
# Active load profile.
p_load_profile_kw = power_flow.p_load_profile_kw
# Add 'active_' to every column name of the p_load_profile dataframe.
p_load_profile_kw.columns = ['active_' + i for i in p_load_profile_kw.columns]
# Reactive load profile.
q_load_profile_kvar = power_flow.q_load_profile_kvar
# Add 'reactive_' to every column name of the q_load_profile dataframe.
q_load_profile_kvar.columns = ['reactive_' + i for i in q_load_profile_kvar.columns]
# Combine the active and reactive power profiles into a single dataframe.
profile_data = pd.concat([p_gen_profile_kw, q_gen_profile_kvar, p_load_profile_kw, q_load_profile_kvar], axis=1).reset_index(drop=True)

ModuleNotFoundError: No module named 'utils'

## Train and Test Test a model

### Exogenous Data

In [None]:
from thesis_package import aimodels as my_ai
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X_train, X_test, y_train, y_test = train_test_split(exogenous_data, output, test_size=0.2, shuffle=False)
X_train['season'] = le.fit_transform(X_train['season'])
X_test['season'] = le.fit_transform(X_test['season'])   
regressor = my_ai.Context(strategy=my_ai.LinearRegressionStrategy())
regressor.fit(data={'X_train': X_train, 'y_train': y_train})
predictions_lr = regressor.predict(data={'X_test': X_test})

In [None]:
predictions_lr_exogenous_data = pd.DataFrame(predictions_lr , columns=y_test.columns)

In [None]:
# get columns that have at least one non-zero value from the predictions_lr_df.
predictions_lr_df_nonzero = predictions_lr_exogenous_data.loc[:, (predictions_lr_exogenous_data != 0).any(axis=0)]
# same for the test data.
y_test_nonzero = y_test.loc[:, (y_test != 0).any(axis=0)]

In [None]:
import matplotlib.pyplot as plt
# Function that takes two series a plots on the same figure.
def plot_series(series1, series2, title):
    plt.figure(figsize=(25,8))
    plt.plot(series1, label='Ground truth')
    plt.plot(series2, label='Predictions')
    plt.legend()
    plt.grid()
    plt.title(title)
    plt.show()
plot_series(y_test['bus_12'].reset_index(drop=True)[:2000], predictions_lr_exogenous_data['bus_12'][:2000], 'Bus 12 trained with exogneous data.')

### Network Active and Reactive Power

In [None]:
X_train, X_test, y_train, y_test = train_test_split(profile_data, output, test_size=0.2, shuffle=False)
regressor.fit(data={'X_train': X_train, 'y_train': y_train})
predictions_lr = regressor.predict(data={'X_test': X_test})
predictions_lr_profile_data = pd.DataFrame(predictions_lr , columns=y_test.columns)

In [None]:
# get columns that have at least one non-zero value from the predictions_lr_df.
predictions_lr_df_nonzero = predictions_lr_profile_data.loc[:, (predictions_lr_profile_data != 0).any(axis=0)]
# same for the test data.
y_test_nonzero = y_test.loc[:, (y_test != 0).any(axis=0)]

In [None]:
plot_series(y_test['bus_12'].reset_index(drop=True)[:2000], predictions_lr_profile_data['bus_12'][:2000], 'Bus 8 trained with profile data.')

# Metric
The metrics yada yada...

In [None]:
from numpy import sqrt
# Input: y_test(pd.DataFrame or pd.Series), y_pred(pd.DataFrame or pd.Series), activation_threshold.
threshold = output.loc[:, output.max(axis=0) != 0].max(axis=0).mean() * 0.1
def get_prediction_scores(y_pred, y_test, threshold=threshold): 
    # Computation.
    _y_test = y_test.reset_index(drop=True) - threshold
    _y_pred = y_pred - threshold
    _squared_error = (_y_test - _y_pred) ** 2
    true_positives_sse = 0
    true_positives_ctr = 0
    false_positives_sse = 0
    false_positives_ctr = 0
    false_negatives_sse = 0
    false_negatives_ctr = 0
    true_negatives_sse = 0
    true_negatives_ctr = 0
    for i in range(_y_test.shape[0]):
        for j in range(_y_test.shape[1]):
            if _y_test.iloc[i, j] > 0:
                if _y_pred.iloc[i, j] > 0:
                    true_positives_sse += _squared_error.iloc[i, j]
                    true_positives_ctr += 1
                else: #_y_pred.iloc[i, j] < 0:
                    false_negatives_sse += _squared_error.iloc[i, j]
                    false_negatives_ctr += 1
            else: #_y_test.iloc[i, j] < 0
                if _y_pred.iloc[i, j] > 0:
                    false_positives_sse += _squared_error.iloc[i, j]
                    false_positives_ctr += 1
                else: #_y_pred.iloc[i, j] < 0:
                    true_negatives_sse += _squared_error.iloc[i, j]
                    true_negatives_ctr += 1
    rmse = lambda signal, ctr: sqrt(signal/ctr if ctr > 0 else 0)
    true_positives_rmse = rmse(true_positives_sse, true_positives_ctr)
    false_positives_rmse = rmse(false_positives_sse, false_positives_ctr)
    false_negatives_rmse = rmse(false_negatives_sse, false_negatives_ctr)
    true_negatives_rmse = rmse(true_negatives_sse, true_negatives_ctr)
    return true_positives_rmse, false_positives_rmse, false_negatives_rmse, true_negatives_rmse, true_positives_ctr, false_positives_ctr, false_negatives_ctr, true_negatives_ctr

In [None]:
print('Prediction from profile data:')
true_positives_rmse, false_positives_rmse, false_negatives_rmse, true_negatives_rmse, \
true_positives_ctr, false_positives_ctr, false_negatives_ctr, true_negatives_ctr = get_prediction_scores(predictions_lr_profile_data, y_test, threshold=0.05)
# Print the above results.
print('True positives RMSE:', true_positives_rmse)
print('False positives RMSE:', false_positives_rmse)
print('False negatives RMSE:', false_negatives_rmse)
print('True negatives RMSE:', true_negatives_rmse)
# Compute recall from the above results.
recall = (true_positives_ctr) / (true_positives_ctr + false_negatives_ctr)
print('Recall:', recall)
# Compute accuracy from the above results.
accuracy = (true_positives_ctr + true_negatives_ctr) / (true_positives_ctr + true_negatives_ctr + false_positives_ctr + false_negatives_ctr)
print('Accuracy:', accuracy)

In [None]:
print('Prediction from exogenous data:')
true_positives_rmse, false_positives_rmse, false_negatives_rmse, true_negatives_rmse,\
true_positives_ctr, false_positives_ctr, false_negatives_ctr, true_negatives_ctr = get_prediction_scores(predictions_lr_exogenous_data, y_test, threshold=0.05)
# Print the above results.
print('True positives RMSE:', true_positives_rmse)
print('False positives RMSE:', false_positives_rmse)
print('False negatives RMSE:', false_negatives_rmse)
print('True negatives RMSE:', true_negatives_rmse)
# Compute recall from the above results.
recall = (true_positives_ctr) / (true_positives_ctr + false_negatives_ctr)
print('Recall:', recall)
# Compute accuracy from the above results.
accuracy = (true_positives_ctr + true_negatives_ctr) / (true_positives_ctr + true_negatives_ctr + false_positives_ctr + false_negatives_ctr)
print('Accuracy:', accuracy)