In [32]:
import pandas as pd
import numpy as np
from dateutil.parser import parse
from datetime import datetime
from datetime import date
import csv
import matplotlib.pyplot as plt
import math
import seaborn as sns
plt.style.use('fivethirtyeight')
import warnings
from IPython.display import display_html
warnings.filterwarnings('ignore')
%matplotlib inline

In [33]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Loading in TCMS Outage Tables and Mergeing Tables Together

In [34]:
outages_arc_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLOUTAGES_ARC_DPL.csv', sep = '|', encoding = "ISO-8859-1")
outages_rolling_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLOUTAGES_ROLLING_DPL.csv', sep = '|', encoding = "ISO-8859-1")
outages_df = outages_arc_df.append(outages_rolling_df, ignore_index = True)
outages_df = outages_df.rename(columns = {'ID': 'OUTAGE_ID'})
del outages_arc_df, outages_rolling_df

In [35]:
outages_df['TIME_OF_FIRST_CALL'] = pd.to_datetime(outages_df['TIME_OF_FIRST_CALL'], infer_datetime_format = True, errors = 'coerce')
outages_df = outages_df[(outages_df['TIME_OF_FIRST_CALL'] >= '2002-01-01 00:00:00')]

In [36]:
outages_df.shape

(807452, 84)

In [37]:
outages_df = outages_df.drop(columns = ['FILTER_ID','WORKORDER','DISPLOC','DISPATCHER','DELETED', 
                                        'SORTNUMBER','NSSORTLEVEL','ANALYZER','CLAIMS'])

In [38]:
outages_df.shape

(807452, 75)

In [39]:
outages_0 = outages_df

In [40]:
# Loading in the failed device, filtering for the three types of underground cable, and dropping some columns
faileddevice_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLLOOKUPFAILEDDEVICE_DPL.csv', sep = '|')
faileddevice_df = faileddevice_df[faileddevice_df['FAILED_DEVICE_CODE'].isin(['UP', 'US', 'UI'])]
faileddevice_df = faileddevice_df[['ID', 'FAILED_DEVICE']]

outages_0 = pd.merge(outages_0, faileddevice_df, left_on = 'FAILDEV_ID', right_on = 'ID', 
                      how = 'inner').drop(['ID', 'FAILDEV_ID'], axis = 1)

In [41]:
outages_0.shape

(20764, 75)

In [42]:
outages_1 = outages_0

In [43]:
interruptingdevice_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLLOOKUPINTERRUPTINGDEVICE_DPL.csv', sep = '|')
interruptingdevice_df = interruptingdevice_df[interruptingdevice_df['HIDE'] != -1]

In [44]:
outages_1 = pd.merge(outages_1, interruptingdevice_df, left_on = 'INTDEV_ID', right_on = 'ID', 
                    how = 'inner').drop(['ID', 'INT_CODE', 'HIDE'], axis = 1)

In [45]:
outages_1.shape

(20734, 76)

In [46]:
outages_2 = outages_1

In [47]:
causecode_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLLOOKUPCAUSECODE.csv', sep = '|')
causecode_df = causecode_df[causecode_df['HIDE'] != -1]
causecode_df = causecode_df[causecode_df['IGNOREFORSTATS'] != -1]
causecode_df = causecode_df[['CAUSEID', 'CAUSEDESCRIPTION', 'SPOKEN_CAUSE_TEXT']]

In [48]:
outages_2 = pd.merge(outages_2, causecode_df, left_on = 'CAUSE_ID', right_on = 'CAUSEID', 
                      how = 'inner').drop(['CAUSE_ID', 'CAUSEID'], axis = 1)

In [49]:
outages_2.shape

(20568, 77)

In [50]:
outages_3 = outages_2

In [51]:
calltype_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLLOOKUPCALLTYPES_DPL.csv', sep = '|')
calltype_df = calltype_df[calltype_df.HIDE == 0]
calltype_df = calltype_df.drop(columns = ['GEB', 'HIDE'])

In [52]:
outages_3 = outages_3.rename(columns = {'CALL_TYPE' : 'CALL_TYPE_ID'})

In [53]:
outages_3 = pd.merge(outages_3, calltype_df, left_on = 'CALL_TYPE_ID', right_on = 'ID', 
                     how = 'inner').drop(columns = ['ID', 'CALL_TYPE_ID'])

In [54]:
outages_3.shape

(20568, 77)

In [55]:
df_ads = outages_3

In [56]:
df_ads = df_ads[(df_ads.LATITUDE != 0)]
df_ads = df_ads[(df_ads.LONGITUDE != 0)]

In [57]:
df_ads.shape

(20460, 77)

In [58]:
outages_4 = df_ads

In [59]:
restoresteps_arc_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLRESTORESTEPS_ARC_DPL.csv', sep = '|')
restoresteps_rolling_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLRESTORESTEPS_ROLLING_DPL.csv', sep = '|')
restore_df = restoresteps_arc_df.append(restoresteps_rolling_df, ignore_index = True)
del restoresteps_arc_df, restoresteps_rolling_df

In [60]:
restore_df['CREWSIZE_median'] = restore_df['CREWSIZE']
restore_df['CREWSIZE_summed'] = restore_df['CREWSIZE']

In [61]:
restore_df = restore_df[['RESTORE_ID', 'OUTAGE_ID', 'CREWSIZE_median', 'CREWSIZE_summed', 'DATETIMERELEASED']]
restore_df['DATETIMERELEASED'] = pd.to_datetime(restore_df['DATETIMERELEASED'], infer_datetime_format = True, errors = 'coerce')

In [62]:
restore_df = restore_df.groupby(['OUTAGE_ID'], as_index = False).agg({'CREWSIZE_median' : 'median', 
                                                                      'CREWSIZE_summed' : 'sum',
                                                                      'DATETIMERELEASED' : 'max'})

In [63]:
restore_df = restore_df[~restore_df['DATETIMERELEASED'].isnull()]
restore_df[['CREWSIZE_median']] = restore_df[['CREWSIZE_median']].fillna(0)
restore_df[['CREWSIZE_summed']] = restore_df[['CREWSIZE_summed']].fillna(0)

In [64]:
restore_df.shape

(994915, 4)

In [65]:
outages_4 = pd.merge(outages_4, restore_df, on = 'OUTAGE_ID', how = 'left')

In [66]:
outages_4.shape

(20460, 80)

In [67]:
outages_5 = outages_4

In [68]:
outages_5['TTR'] = (outages_5['DATETIMERELEASED'] - outages_5['TIME_OF_FIRST_CALL']).dt.total_seconds().div(60).round(0)

In [69]:
outages_5.TTR

0          381.0
1          246.0
2          148.0
3          299.0
4          214.0
          ...   
20455    15931.0
20456      129.0
20457      327.0
20458      177.0
20459      197.0
Name: TTR, Length: 20460, dtype: float64

In [70]:
outages_6 = outages_5

In [71]:
outages_6['PHASE_A'] = outages_6.PHASE_A.apply(lambda x: True if x < 0 else False)
outages_6['PHASE_B'] = outages_6.PHASE_B.apply(lambda x: True if x < 0 else False)
outages_6['PHASE_C'] = outages_6.PHASE_C.apply(lambda x: True if x < 0 else False)
outages_6['PHASE_T'] = outages_6.PHASE_T.apply(lambda x: True if x < 0 else False)
outages_6['FIREEXPLOSION'] = outages_6.FIREEXPLOSION.apply(lambda x: True if x < 0 else False)
outages_6['DAMAGEDTRANS'] = outages_6.DAMAGEDTRANS.apply(lambda x: True if x < 0 else False)
outages_6['PRIORITY'] = outages_6.PRIORITY.apply(lambda x: True if x < 0 else False)
outages_6['MADESAFE'] = outages_6.MADESAFE.apply(lambda x: True if x < 0 else False)
outages_6['OVERTIME'] = outages_6.OVERTIME.apply(lambda x: True if x < 0 else False)
outages_6['MATERIALHOLD'] = outages_6.MATERIALHOLD.apply(lambda x: True if x < 0 else False)
outages_6['ECD_OVERRIDE'] = outages_6.ECD_OVERRIDE.apply(lambda x: True if x < 0 else False)
outages_6['URD_OUTAGE'] = outages_6.URD_OUTAGE.apply(lambda x: True if x < 0 else False)
outages_6['TREE_CLEARED_NO_WORK_NEEDED'] = outages_6.TREE_CLEARED_NO_WORK_NEEDED.apply(lambda x: True if x < 0 else False)
outages_6['TREE_CLEARED'] = outages_6.TREE_CLEARED.apply(lambda x: True if x < 0 else False)
outages_6['TREE_VERIFIED'] = outages_6.TREE_VERIFIED.apply(lambda x: True if x < 0 else False)
outages_6['MATERIALS_DELIVERED'] = outages_6.MATERIALS_DELIVERED.apply(lambda x: True if x < 0 else False)
outages_6['CALLED_CUSTOMER_OK'] = outages_6.CALLED_CUSTOMER_OK.apply(lambda x: True if x < 0 else False)
outages_6['CREW_NEEDS_HELP'] = outages_6.CREW_NEEDS_HELP.apply(lambda x: True if x < 0 else False)
outages_6['HYDRO_VAC_COMPLETED'] = outages_6.HYDRO_VAC_COMPLETED.apply(lambda x: True if x < 0 else False)
outages_6['HYDRO_VAC_DISPATCHED'] = outages_6.HYDRO_VAC_DISPATCHED.apply(lambda x: True if x < 0 else False)
outages_6['SCOUTED'] = outages_6.SCOUTED.apply(lambda x: True if x < 0 else False)


outages_6['PREFERRED_TREE_CREW_FLG'] = outages_6.PREFERRED_TREE_CREW.apply(lambda x: True if x > 0 else False)
outages_6['PREFERRED_SCOUT_CREW_FLG'] = outages_6.PREFERRED_SCOUT_CREW.apply(lambda x: True if x > 0 else False)
outages_6['HYDRO_VAC_NEEDED'] = outages_6.HYDRO_VAC_NEEDED.apply(lambda x: True if x < 0 else False)

       

outages_6 = outages_6.drop(columns = ['PREFERRED_TREE_CREW','PREFERRED_SCOUT_CREW'])
     
outages_6['OVERTIME'] = outages_6.OVERTIME.apply(lambda x: True if x < 0 else False)
outages_6['HOLD'] = outages_6.HOLD.apply(lambda x: True if x < 0 else False)


outages_6['SAFETY'] = outages_6.SAFETY.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['WIREDOWN_PTH'] = outages_6.WIREDOWN_PTH.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['WIREDOWN_PTP'] = outages_6.WIREDOWN_PTP.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['TREEONWIRE_PTH'] = outages_6.TREEONWIRE_PTH.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['TREEONWIRE_PTP'] = outages_6.TREEONWIRE_PTP.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['BROKENPOLE'] = outages_6.BROKENPOLE.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)
outages_6['PUBLIC_SAFETY'] = outages_6.PUBLIC_SAFETY.apply(lambda x: 1 if x < 0 else 0).apply(lambda x: True if x == 1 else False)

In [72]:
outages_6[['PHASE_A_CUSTS', 'PHASE_B_CUSTS', 'PHASE_C_CUSTS']] = outages_6[['PHASE_A_CUSTS', 'PHASE_B_CUSTS', 'PHASE_C_CUSTS']].fillna(0)
outages_6[['PHASE_T_CUSTS', 'PHASE_A_CALLS', 'PHASE_B_CALLS', 'PHASE_C_CALLS']] = outages_6[['PHASE_T_CUSTS', 'PHASE_A_CALLS', 'PHASE_B_CALLS', 'PHASE_C_CALLS']].fillna(0)
outages_6[['PHASE_T_CALLS','TOTAL_CUSTS']] = outages_6[['PHASE_T_CALLS','TOTAL_CUSTS']].fillna(0)
outages_6[['PHASE_T_CALLS','TOTAL_CUSTS']] = outages_6[['PHASE_T_CALLS','TOTAL_CUSTS']].fillna(0)
outages_6[['LFS_CUSTS', 'MED_CUSTS', 'EMR_CUSTS', 'UTL_CUSTS']] = outages_6[[ 'LFS_CUSTS', 'MED_CUSTS', 'EMR_CUSTS', 'UTL_CUSTS']].fillna(0)
outages_6[['MAJ_CUSTS', 'GOV_CUSTS', 'LRT_CUSTS']] = outages_6[['MAJ_CUSTS', 'GOV_CUSTS', 'LRT_CUSTS']].fillna(0)
outages_6[['CALL_TYPE','NONRES_CUSTS','NONRES_CALLS']] = outages_6[['CALL_TYPE','NONRES_CUSTS','NONRES_CALLS']].fillna(0)
outages_6[['SORTLEVEL']] = outages_6[['SORTLEVEL']].fillna(0)

In [73]:
outages_7 = outages_6

In [74]:
stormexclusions_m_df = pd.read_csv('gs://aes-datahub-0001-raw/OMS/2002-2020/DPL/TBLSTORMEXCLUSIONS_M_DPL.csv', sep = '|')
stormexclusions_m_df['FROMDATE'] = pd.to_datetime(stormexclusions_m_df['FROMDATE'], infer_datetime_format = True, errors = 'coerce')
stormexclusions_m_df['TODATE'] = pd.to_datetime(stormexclusions_m_df['TODATE'], infer_datetime_format = True, errors = 'coerce')
stormexclusions_m_df = stormexclusions_m_df[stormexclusions_m_df['FROMDATE'] >= '2002-01-01'].reset_index(drop = True)

stormexclusions_m_df = stormexclusions_m_df[stormexclusions_m_df.METHOD == 'P'][['FROMDATE','TODATE','STORMCUSTS','REMARKS']]
stormexclusions_m_df = stormexclusions_m_df.rename(columns = {'REMARKS': 'STORM_TYPE'})

In [75]:
#Creates the mergeable table called "storm_df"
columnnames = stormexclusions_m_df.columns.tolist()
storm_df = pd.DataFrame(columns = columnnames)

storm_df['Date'] = 0
row_counter = 0

for i in stormexclusions_m_df.iterrows():
    date_list = pd.date_range(start = stormexclusions_m_df.iloc[row_counter, 0], 
                              end  = stormexclusions_m_df.iloc[row_counter, 1], 
                              freq = 'D').strftime('%Y-%m-%d').tolist()
    date_counter = 0
    for d in date_list:
        storm_df = storm_df.append(stormexclusions_m_df.iloc[row_counter, :], ignore_index = True)
        storm_df.iloc[-1, -1] = date_list[date_counter]
        date_counter = date_counter + 1
    row_counter = row_counter + 1
storm_df['EVENT'] = 'STORM'

In [76]:
outages_7['Date'] = outages_7.TIME_OF_FIRST_CALL.dt.date.astype(str)
storm_df['Date'] = storm_df.Date.astype(str)

In [77]:
outages_7 = pd.merge(outages_7, storm_df, on = ['Date'], how = "left")
outages_7 = outages_7.drop(columns = ['Date'])

In [78]:
outages_7['EVENT'] = outages_7.EVENT.apply(lambda x: 'STORM' if x == 'STORM' else 'BLUE SKY')

In [79]:
outages_8 = outages_7

This filter removes just one abnormally high TTR outage

In [80]:
outages_8 = outages_8[outages_8['TTR'] <= 20160]

In [81]:
df_ads = outages_8

In [82]:
df_ads.to_csv('gs://aes-datahub-0002-curated/Outage_Restoration/Underground_Cable_Datasets/DPL_EDA_Dataset.csv', index = False)

In [83]:
df_ads.head()

Unnamed: 0,OUTAGE_ID,DEVICE_ID,CIRCUIT,DEVICE,TIME_OF_FIRST_CALL,PHASE_A_CUSTS,PHASE_B_CUSTS,PHASE_C_CUSTS,PHASE_T_CUSTS,PHASE_A_CALLS,PHASE_B_CALLS,PHASE_C_CALLS,PHASE_T_CALLS,BRANCHIND,PHASE_A,PHASE_B,PHASE_C,PHASE_T,TOTAL_CUSTS,SAFETY,SORTLEVEL,ALLRESTORED,LFS_CUSTS,MED_CUSTS,EMR_CUSTS,UTL_CUSTS,MAJ_CUSTS,GOV_CUSTS,LRT_CUSTS,STATUS,OUTAGE_NAME,HOLD,WIREDOWN_PTH,WIREDOWN_PTP,TREEONWIRE_PTH,TREEONWIRE_PTP,BROKENPOLE,FIREEXPLOSION,DAMAGEDTRANS,LATITUDE,LONGITUDE,ARC_INDEX,CLOSEST_CREW,PREFERRED_CREW,PRIORITY,CONFIDENCE,REMARKS,CUSTSTILLOUT,DATETIMEADDED,MADESAFE,INTDEV_ID,OVERTIME,ECD,MATERIALHOLD,LASTUPDATE,ECD_OVERRIDE,URD_OUTAGE,SCOUTED,TREE_VERIFIED,TREE_CLEARED,TREE_CLEARED_NO_WORK_NEEDED,MATERIALS_DELIVERED,CALLED_CUSTOMER_OK,CREW_NEEDS_HELP,NONRES_CUSTS,NONRES_CALLS,PUBLIC_SAFETY,HYDRO_VAC_NEEDED,HYDRO_VAC_DISPATCHED,HYDRO_VAC_COMPLETED,FAILED_DEVICE,INTERRUPTING_DEVICE,CAUSEDESCRIPTION,SPOKEN_CAUSE_TEXT,CALL_TYPE,CREWSIZE_median,CREWSIZE_summed,DATETIMERELEASED,TTR,PREFERRED_TREE_CREW_FLG,PREFERRED_SCOUT_CREW_FLG,FROMDATE,TODATE,STORMCUSTS,STORM_TYPE,EVENT
0,228940,26567.0,HF1205,C8111,2002-02-20 21:15:12,5.0,9.0,18.0,0.0,1.0,0.0,0.0,0.0,1,True,False,False,False,1.0,False,6.0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,I-C8111,False,False,False,False,False,False,False,False,39.535121,-83.848747,2,,708.0,False,H,BAD PIECE OF URD PRIMARY TO HOUSE-WILL NEED 1 ...,0,20-02-2002 21:15:20,False,1.0,False,,False,,False,False,False,False,False,False,False,False,False,0.0,0.0,False,False,False,False,Underground Primary Cable,Branch Line Fuse or Riser,Equipment Failure,was caused by damage to D P and L equipment,ALL OUT,2.0,4.0,2002-02-21 03:35:58,381.0,False,False,NaT,NaT,,,BLUE SKY
1,227962,13518.0,QA1203,D0505,2002-02-12 13:01:23,121.0,78.0,133.0,0.0,2.0,41.0,1.0,0.0,2,False,True,False,False,78.0,False,3.0,-1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,6,B-D0505,False,False,False,False,False,False,False,False,39.616646,-84.247795,2,141.0,339.0,False,H,riser d0505 picked back up back fed from norm...,0,12-02-2002 13:01:28,False,1.0,False,,False,,False,False,False,False,False,False,False,False,False,0.0,0.0,False,False,False,False,Underground Primary Cable,Branch Line Fuse or Riser,Equipment Failure,was caused by damage to D P and L equipment,ALL OUT,1.0,4.0,2002-02-12 17:07:09,246.0,False,False,NaT,NaT,,,BLUE SKY
2,224567,7003.0,RE1211,A9367,2002-01-04 01:26:00,0.0,0.0,31.0,0.0,0.0,0.0,3.0,0.0,2,False,False,True,False,31.0,False,8.0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,I-A9367,False,False,False,False,False,False,False,False,39.832772,-84.282752,2,844.0,,False,H,repair 1/0 bare concentric neutral direct buri...,0,04-01-2002 01:26:03,False,1.0,False,,False,,False,False,False,False,False,False,False,False,False,0.0,0.0,False,False,False,False,Underground Primary Cable,Branch Line Fuse or Riser,Equipment Failure,was caused by damage to D P and L equipment,ALL OUT,1.0,2.0,2002-01-04 03:53:35,148.0,False,False,NaT,NaT,,,BLUE SKY
3,240763,25042.0,HF1205,A5567,2002-04-11 18:34:46,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,1,False,True,False,False,1.0,False,9.0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,I-A5567,True,False,False,False,False,False,False,False,39.550802,-83.790541,2,,709.0,False,H,BAD PIECE OF PRIMARY-NEED A UCT & 1 MORE LINEM...,0,11-04-2002 18:35:02,False,1.0,False,,False,,False,False,False,False,False,False,False,False,False,0.0,0.0,False,False,False,False,Underground Primary Cable,Branch Line Fuse or Riser,Equipment Failure,was caused by damage to D P and L equipment,ALL OUT,1.0,6.0,2002-04-11 23:33:59,299.0,False,False,NaT,NaT,,,BLUE SKY
4,226148,23916.0,GF1201,07771,2002-01-24 19:26:01,43.0,192.0,124.0,21.0,0.0,0.0,6.0,0.0,2,False,False,True,False,124.0,False,3.0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,B-07771,False,False,False,False,False,False,False,False,39.560952,-84.134158,2,794.0,794.0,False,H,repaired primary,0,24-01-2002 19:30:24,False,1.0,False,,False,,False,False,False,False,False,False,False,False,False,0.0,0.0,False,False,False,False,Underground Primary Cable,Branch Line Fuse or Riser,Equipment Failure,was caused by damage to D P and L equipment,ALL OUT,1.0,2.0,2002-01-24 23:00:23,214.0,False,False,NaT,NaT,,,BLUE SKY
