# Predicting electric grid stability

In this work, I’ll build a binary classification model to predict if a grid is stable or unstable using the UCI Electrical Grid Stability Simulated dataset.
It has 12 primary predictive features and two dependent variables.

#### Predictive features:
'tau1' to 'tau4': the reaction time of each network participant, a real value within the range 0.5 to 10 ('tau1' corresponds to the supplier node, 'tau2' to 'tau4' to the consumer nodes);

'p1' to 'p4': nominal power produced (positive) or consumed (negative) by each network participant, a real value within the range -2.0 to -0.5 for consumers ('p2' to 'p4'). As the total power consumed equals the total power generated, p1 (supplier node) = - (p2 + p3 + p4);

'g1' to 'g4': price elasticity coefficient for each network participant, a real value within the range 0.05 to 1.00 ('g1' corresponds to the supplier node, 'g2' to 'g4' to the consumer nodes; 'g' stands for 'gamma');
    
#### Dependent variables:

'stab': the maximum real part of the characteristic differential equation root (if positive, the system is linearly unstable; if negative, linearly stable);

'stabf': a categorical (binary) label ('stable' or 'unstable').

Because of the direct relationship between 'stab' and 'stabf' ('stabf' = 'stable' if 'stab' <= 0, 'unstable' otherwise), 'stab' would be dropped and 'stabf' will remain as the sole dependent variable (binary classification).

In [1]:
#Importing the dataset
import pandas as pd
import numpy as np

df= pd.read_csv('dataset.csv')
df

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable


In [2]:
df.stabf.value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [3]:
df = df.drop(columns = ['stab'])
df

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,unstable


In [4]:
# Selecting our dependent and independent variables where y is the independent and x is the dependent variable
x = df.drop(columns = ['stabf'])
y = df['stabf']

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size =0.2, random_state = 1)

In [6]:
#Transforming the dataset using a standard scaler
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train_scaled = sc.fit_transform(x_train)
x_test_scaled = sc.transform(x_test)


In [7]:
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score

##  Measuring the accuracy of a Random Forest classifier

In [8]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state = 1)
rfc.fit(x_train_scaled, y_train)
rfc_pred=rfc.predict(x_test_scaled)

In [9]:
accuracy = accuracy_score(y_true=y_test, y_pred=rfc_pred)
#accuracy = round(accuracy, 4)
accuracy

0.929

## Measuring the accuracy of an XGboost classifier


In [10]:
import xgboost as xgb
#xgboost using gradient boosting
from xgboost import XGBClassifier
ex = XGBClassifier(random_state=1)
ex.fit(x_train_scaled, y_train)
ex_pred = ex.predict(x_test_scaled)

In [11]:
accuracy = accuracy_score(y_true=y_test, y_pred=ex_pred )
accuracy = round(accuracy, 4)
accuracy

0.9195

## Measuring the accuracy of a Lightboost classifier

In [12]:

from lightgbm import LGBMClassifier
light = LGBMClassifier(random_state=1)
light.fit(x_train_scaled, y_train)
light_pred = light.predict(x_test_scaled)

In [13]:
accuracy = accuracy_score(y_true=y_test, y_pred=light_pred )
accuracy = round(accuracy, 4)
accuracy

0.9375

## Using the ExtraTrees classifier to determne which features are important

In [14]:
from sklearn.ensemble import ExtraTreesClassifier
tree = ExtraTreesClassifier(random_state = 1)
tree.fit(x_train_scaled , y_train)
tree_pred = tree.predict(x_test_scaled)


In [15]:
# Determing the order of significance of the features from lowest to highest 
feat_importance = pd.Series (tree.feature_importances_, index = x.columns)
feat_importance.sort_values()

p1      0.039507
p2      0.040371
p4      0.040579
p3      0.040706
g1      0.089783
g2      0.093676
g4      0.094019
g3      0.096883
tau3    0.113169
tau4    0.115466
tau1    0.117397
tau2    0.118445
dtype: float64

## HyperParameter tunning

In [16]:
n_estimators = [50, 100, 300, 500, 1000]
min_samples_split = [2, 3, 5, 7, 9]
min_samples_leaf = [1, 2, 4, 6, 8]
max_features = ['auto', 'sqrt', 'log2', None] 
hyperparameter_grid = {'n_estimators': n_estimators,
                       'min_samples_leaf': min_samples_leaf,
                       'min_samples_split': min_samples_split,
                       'max_features': max_features}


In [17]:
from sklearn.model_selection import RandomizedSearchCV
tree = ExtraTreesClassifier(random_state = 1)
clf = RandomizedSearchCV(tree, hyperparameter_grid, random_state = 1)
search = clf.fit(x_train_scaled , y_train)


In [18]:
#checking for the best parameter for the model
search.best_params_.values()

dict_values([1000, 2, 8, None])

In [19]:
#experimenting with this parameter to test the model's performance
tree_param = ExtraTreesClassifier(n_estimators=1000, min_samples_split=2, 
                                 min_samples_leaf=8, random_state=1) #max_features='None'
tree_param.fit(x_train_scaled, y_train)
tree_param_pred = tree_param.predict(x_test_scaled)

## Comparing the accuracy using ExtraTrees Classifier against the accuracy after Hyperparameter tunning

In [20]:
#ACCURACY USING EXTRATREES
accuracy = accuracy_score(y_true=y_test, y_pred=tree_pred )
accuracy = round(accuracy, 4)
accuracy

0.928

In [21]:
#ACCURACY AFTER HYPERPARAMETER TUNNING
accuracy = accuracy_score(y_true=y_test, y_pred=tree_param_pred )
accuracy = round(accuracy, 4)
accuracy

0.9105