# Risk Analysis

## Install Dependencies

In [1]:
%%capture
%pip install pandas==1.3.5
%pip install numpy==1.21.6

## Import Modules

In [2]:
import os
import pandas as pd
import numpy as np

## Mount Drive

In [3]:
import os

team_name = 'capstone-power-grid-protagonists'
colab_path = f'/content/drive/Shareddrives/{team_name}/project'
studiolab_path = f'/home/studio-lab-user/sagemaker-studiolab-notebooks/{team_name}'

try:
    # Try to mount Google Drive and set project path
    from google.colab import drive
    drive.flush_and_unmount()
    drive.mount('/content/drive')
    print('')

    root_path = colab_path
    os.chdir(root_path)

except:
    try:
        # Try to set AWS SageMaker Studio Lab project path
        root_path = studiolab_path
        os.chdir(root_path)
    
    except:
        # Set current working directory as root path
        root_path = os.getcwd()
        os.chdir(root_path)

        # If the current folder is 'notebooks', move up one level
        if root_path.endswith('/notebooks'):
            root_path = '/'.join(root_path.split('/')[:-1])
            os.chdir(root_path)
        
print('Current working directory is:')
print(os.getcwd())

Mounted at /content/drive

Current working directory is:
/content/drive/Shareddrives/capstone-power-grid-protagonists/project


## Import Data

In [4]:
# Processed data
plants = pd.read_csv('data/processed/hifld/power_plants.csv', low_memory=False)
subs = pd.read_csv('data/processed/hifld/substations.csv', low_memory=False)
bas = pd.read_csv('data/processed/eia/balancing_authorities.csv', low_memory=False)

## Analysis

In [5]:
# This function will return the dataframe with defined columns ranked and a final risk score
def risk_score(df,
               cols=['deg_cent', 'bet_cent', 'clust_coef', 'disturbance_prob', 'outage_prob', 'potential_population_affected'],
               ascending=[True, True, False, True, True, True]):

    rank_cols = []

    for idx, col in enumerate(cols):
        if col in df.columns.tolist():
            rank_col = col + '_rank'
            df[rank_col] = df[col].rank(na_option='bottom', pct=True, ascending=ascending[idx])
            rank_cols.append(rank_col)

    df['risk_score'] = df[rank_cols].mean(axis=1).round(3)
    
    return df.sort_values('risk_score', ascending=False)

In [6]:
plants = risk_score(plants)
subs = risk_score(subs)
bas = risk_score(bas)

In [7]:
subs[['sub_code', 'name', 'city', 'state', 'deg_cent', 'bet_cent', 'clust_coef', 'disturbance_prob', 'outage_prob', 'potential_population_affected', 'risk_score']].head(25)

Unnamed: 0,sub_code,name,city,state,deg_cent,bet_cent,clust_coef,disturbance_prob,outage_prob,potential_population_affected,risk_score
35743,150577,Conservation,Fort Lauderdale,FL,0.000133,0.000654,0.0,0.051957,0.044423,1930983.0,0.89
36533,151373,Florida City,Homestead,FL,8.9e-05,0.000417,0.0,0.051957,0.044423,2662777.0,0.884
35875,150710,Princeton,Homestead,FL,8.9e-05,0.000476,0.0,0.051957,0.044423,2662777.0,0.884
37083,151932,Oakland Park,Oakland Park,FL,0.000111,0.000207,0.0,0.051957,0.044423,1930983.0,0.879
36117,150957,Miami Shores,Miami,FL,8.9e-05,0.000163,0.0,0.051957,0.044423,2662777.0,0.875
36080,150919,Moffett,Hollywood,FL,8.9e-05,0.000169,0.0,0.051957,0.044423,1930983.0,0.871
36135,150975,Little River,Miami,FL,0.000111,9e-05,0.0,0.051957,0.044423,2662777.0,0.868
35770,150606,Country Club,Hialeah,FL,8.9e-05,0.000115,0.0,0.051957,0.044423,2662777.0,0.868
71745,308689,Unknown308689,West Columbia,TX,0.000133,0.000426,0.0,0.135421,0.096575,379689.0,0.867
36525,151365,Lucy,Homestead,FL,0.000111,7.6e-05,0.0,0.051957,0.044423,2662777.0,0.864


In [8]:
bas[['ba_code', 'ba_name', 'region_country_name', 'deg_cent', 'bet_cent', 'clust_coef', 'disturbance_prob', 'outage_prob', 'potential_population_affected', 'risk_score']].head(10)

Unnamed: 0,ba_code,ba_name,region_country_name,deg_cent,bet_cent,clust_coef,disturbance_prob,outage_prob,potential_population_affected,risk_score
33,MISO,"Midcontinent Independent System Operator, Inc.",Midwest,0.167168,0.003064,2e-05,0.240998,0.185812,678993600.0,0.901
40,PJM,"PJM Interconnection, LLC",Mid-Atlantic,0.136126,0.001431,1.9e-05,0.125049,0.083072,962231600.0,0.89
8,CISO,California Independent System Operator,California,0.108489,0.000812,6e-05,0.102544,0.067319,2125385000.0,0.874
15,ERCO,"Electric Reliability Council of Texas, Inc.",Texas,0.037377,0.000315,0.0,0.129941,0.093053,1562223000.0,0.865
52,SWPP,Southwest Power Pool,Central,0.048385,0.002774,0.000172,0.054207,0.035421,149791400.0,0.847
36,NYIS,New York Independent System Operator,New York,0.056066,0.000722,3.2e-05,0.038845,0.025049,345554900.0,0.845
29,ISNE,ISO New England,New England,0.08204,0.000477,1.5e-05,0.044423,0.028278,259558700.0,0.845
49,SOCO,"Southern Company Services, Inc. - Trans",Southeast,0.025578,0.001069,0.000846,0.022211,0.018982,206747800.0,0.813
13,DUK,Duke Energy Carolinas,Carolinas,0.040862,0.000402,0.000421,0.015949,0.011252,254715300.0,0.78
6,BPAT,Bonneville Power Administration,Northwest,0.012987,0.00069,0.006293,0.03454,0.016243,215292700.0,0.773


In [9]:
subs.groupby('state')[['state', 'deg_cent', 'bet_cent', 'clust_coef', 'disturbance_prob', 'outage_prob', 'potential_population_affected', 'risk_score']].mean().sort_values(by='risk_score', ascending=False).head(10)

Unnamed: 0_level_0,deg_cent,bet_cent,clust_coef,disturbance_prob,outage_prob,potential_population_affected,risk_score
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MO,4.9e-05,0.000703,0.04166,0.015859,0.012461,144156.663763,0.566581
MA,5.2e-05,0.000191,0.036368,0.006428,0.005093,761991.844479,0.562212
NC,4.9e-05,0.000166,0.043363,0.011558,0.009683,232810.271938,0.559408
WA,5.3e-05,0.000113,0.024084,0.024055,0.014959,526808.815094,0.556256
NY,4.6e-05,0.00017,0.03889,0.021248,0.014586,426693.786691,0.546036
IL,4.7e-05,0.000534,0.03513,0.015045,0.013378,613625.088942,0.544861
CT,5.2e-05,8.2e-05,0.082908,0.00551,0.004599,654266.528053,0.537997
TX,5.2e-05,0.000225,0.036376,0.054766,0.042963,743140.968804,0.537764
TN,5.4e-05,0.00042,0.061742,0.010421,0.0069,193596.471259,0.535572
LA,5.1e-05,0.000334,0.034302,0.016551,0.013822,153359.353455,0.522787


In [10]:
bas.groupby('region_country_name')[['region_country_name', 'deg_cent', 'bet_cent', 'clust_coef', 'disturbance_prob', 'outage_prob', 'potential_population_affected', 'risk_score']].mean().sort_values(by='risk_score', ascending=False).head(10)

Unnamed: 0_level_0,deg_cent,bet_cent,clust_coef,disturbance_prob,outage_prob,potential_population_affected,risk_score
region_country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mid-Atlantic,0.136126,0.001431,1.9e-05,0.125049,0.083072,962231600.0,0.89
Texas,0.037377,0.000315,0.0,0.129941,0.093053,1562223000.0,0.865
New England,0.08204,0.000477,1.5e-05,0.044423,0.028278,259558700.0,0.845
New York,0.056066,0.000722,3.2e-05,0.038845,0.025049,345554900.0,0.845
Tennessee,0.011403,0.000274,0.003499,0.015949,0.010665,92123010.0,0.726
Central,0.025538,0.001393,0.010839,0.027104,0.01771,149791400.0,0.632
California,0.025467,0.000186,0.012565,0.021605,0.013933,532704400.0,0.55
Midwest,0.043435,0.000797,0.263847,0.06113,0.04704,226502400.0,0.501
Northwest,0.004853,0.000193,0.070187,0.00807,0.003238,35068130.0,0.497381
Carolinas,0.014663,0.00012,0.060569,0.004941,0.002723,64609010.0,0.495667
