In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [46]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, ConfusionMatrixDisplay

In [47]:
#Raw content URL for adult-all.txt
url = "https://raw.githubusercontent.com/hastighsh/Ellehacks_2024/main/Power_outage_pattern"

#Specify the delimiter (assuming it's a tab-separated file)
delimiter = ','

#Read the data into a DataFrame
power_outages_data = pd.read_csv(url, delimiter=delimiter)

#Let's create a backup copy of the dataset
outage_backup = outage.copy()

In [48]:
power_outages_data


Unnamed: 0,Event Description,Year,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags
0,Severe Weather - Thunderstorms,2014,6/30/2014,8:00 PM,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm"
1,Severe Weather - Thunderstorms,2014,6/30/2014,11:20 PM,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm"
2,Severe Weather - Thunderstorms,2014,6/30/2014,5:55 PM,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm"
3,Fuel Supply Emergency - Coal,2014,6/27/2014,1:21 PM,Unknown,Unknown,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal"
4,Physical Attack - Vandalism,2014,6/24/2014,2:54 PM,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical"
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,4:00 p.m.,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption
1648,Vandalism,2000,3/14/2000,9:06 p.m.,,,Alliant Energy,Maine,MAIN,,,vandalism
1649,Ice Storm,2000,1/29/2000,10:00 p.m.,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm"
1650,Ice Storm,2000,1/24/2000,7:00 p.m.,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm"


In [49]:
power_outages_data.info

<bound method DataFrame.info of                    Event Description  Year Date Event Began Time Event Began  \
0     Severe Weather - Thunderstorms  2014        6/30/2014          8:00 PM   
1     Severe Weather - Thunderstorms  2014        6/30/2014         11:20 PM   
2     Severe Weather - Thunderstorms  2014        6/30/2014          5:55 PM   
3       Fuel Supply Emergency - Coal  2014        6/27/2014          1:21 PM   
4        Physical Attack - Vandalism  2014        6/24/2014          2:54 PM   
...                              ...   ...              ...              ...   
1647          Transmission Line Loss  2000        3/18/2000        4:00 p.m.   
1648                       Vandalism  2000        3/14/2000        9:06 p.m.   
1649                       Ice Storm  2000        1/29/2000       10:00 p.m.   
1650                       Ice Storm  2000        1/24/2000        7:00 p.m.   
1651                       Ice Storm  2000        1/23/2000        8:00 a.m.   

     Da

In [50]:
# check data type of cloumn
power_outages_data["Time Event Began"].dtype

dtype('O')

In [51]:
# check unique values to find out missmatched types
power_outages_data_copy = power_outages_data.copy()
power_outages_data_copy['Time Event Began'].unique()

array(['8:00 PM', '11:20 PM', '5:55 PM', '1:21 PM', '2:54 PM', '8:47 AM',
       '9:52 AM', '5:00 PM', '12:00 AM', '9:10 AM', '4:00 PM', '9:30 AM',
       '9:50 PM', '11:07 AM', '11:00 PM', '1:00 PM', '3:00 AM', '1:06 PM',
       '3:32 PM', '1:38 AM', '11:00 AM', '12:31 PM', '3:00 PM', '7:01 AM',
       '10:43 AM', '12:10 PM', '3:34 PM', '1:14 PM', '6:38 PM', '6:00 PM',
       '8:39 AM', '1:00 AM', '3:50 AM', '11:30 PM', '9:37 AM', '9:15 AM',
       '12:07 PM', '7:00 AM', '3:02 PM', '7:45 PM', '6:15 PM', '11:09 AM',
       '3:30 AM', '2:45 PM', '3:41 PM', '1:37 PM', '5:25 PM', '12:34 PM',
       '7:35 PM', '5:06 PM', '9:06 AM', '6:40 AM', '1:48 AM', '7:00 PM',
       '2:53 AM', '4:40 PM', '6:43 PM', '2:15 PM', '11:03 AM', '7:48 AM',
       '4:50 PM', '4:30 PM', '1:58 PM', '1:05 PM', '3:35 PM', '8:05 AM',
       '5:00 AM', '7:35 AM', '2:20 PM', '9:00 PM', '10:00 AM', '3:30 PM',
       '9:17 PM', '4:00 AM', '1:04 PM', '9:45 AM', '1:55 PM', '9:15 PM',
       '12:30 AM', '12:14 PM', '5:39 

In [52]:
power_outages_data_copy.count()

Event Description               1652
Year                            1652
Date Event Began                1652
Time Event Began                1643
Date of Restoration             1638
Time of Restoration             1632
Respondent                      1652
Geographic Areas                1651
NERC Region                     1650
Demand Loss (MW)                1246
Number of Customers Affected    1434
Tags                            1651
dtype: int64

In [53]:
# apply some rules to get rid of the values Midnight, Evening and 12:00 noon // it could be discussed if Evening should convert to null or to an aproximate time for evening as I do here
power_outages_data_copy.loc[(power_outages_data_copy['Time Event Began'] == 'Midnight', 'Time Event Began')] = "12:00 a.m."
power_outages_data_copy.loc[(power_outages_data_copy['Time Event Began'] == 'Evening', 'Time Event Began')] = "06:00 p.m."
power_outages_data_copy.loc[(power_outages_data_copy['Time Event Began'] == '12:00 noon', 'Time Event Began')] = "12:00 p.m."
power_outages_data_copy['Time Event Began'].unique()

array(['8:00 PM', '11:20 PM', '5:55 PM', '1:21 PM', '2:54 PM', '8:47 AM',
       '9:52 AM', '5:00 PM', '12:00 AM', '9:10 AM', '4:00 PM', '9:30 AM',
       '9:50 PM', '11:07 AM', '11:00 PM', '1:00 PM', '3:00 AM', '1:06 PM',
       '3:32 PM', '1:38 AM', '11:00 AM', '12:31 PM', '3:00 PM', '7:01 AM',
       '10:43 AM', '12:10 PM', '3:34 PM', '1:14 PM', '6:38 PM', '6:00 PM',
       '8:39 AM', '1:00 AM', '3:50 AM', '11:30 PM', '9:37 AM', '9:15 AM',
       '12:07 PM', '7:00 AM', '3:02 PM', '7:45 PM', '6:15 PM', '11:09 AM',
       '3:30 AM', '2:45 PM', '3:41 PM', '1:37 PM', '5:25 PM', '12:34 PM',
       '7:35 PM', '5:06 PM', '9:06 AM', '6:40 AM', '1:48 AM', '7:00 PM',
       '2:53 AM', '4:40 PM', '6:43 PM', '2:15 PM', '11:03 AM', '7:48 AM',
       '4:50 PM', '4:30 PM', '1:58 PM', '1:05 PM', '3:35 PM', '8:05 AM',
       '5:00 AM', '7:35 AM', '2:20 PM', '9:00 PM', '10:00 AM', '3:30 PM',
       '9:17 PM', '4:00 AM', '1:04 PM', '9:45 AM', '1:55 PM', '9:15 PM',
       '12:30 AM', '12:14 PM', '5:39 

In [54]:
# Changing 'Time Event Began' values to Datetime and put them in a new column
power_outages_data_copy["Time Event Began Parsed"] = pd.to_datetime(power_outages_data_copy["Time Event Began"], infer_datetime_format=True, errors='coerce').dt.time
power_outages_data_copy.sample(10)

  power_outages_data_copy["Time Event Began Parsed"] = pd.to_datetime(power_outages_data_copy["Time Event Began"], infer_datetime_format=True, errors='coerce').dt.time
  power_outages_data_copy["Time Event Began Parsed"] = pd.to_datetime(power_outages_data_copy["Time Event Began"], infer_datetime_format=True, errors='coerce').dt.time


Unnamed: 0,Event Description,Year,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Time Event Began Parsed
451,Suspected Physical Attack,2012,4/12/2012,8:08 AM,4/12/2012,4:30 PM,Tacoma Power (TPWR),"Tacoma, Washington",WECC,,,"vandalism, physical",08:08:00
251,Vandalism,2013,5/15/2013,2:11 PM,5/15/2013,2:12 PM,Delmarva Power & Light Company,"Wilmington, Delaware",RFC,Unknown,Unknown,vandalism,14:11:00
203,Fuel Supply Emergency (Natural Gas),2013,7/17/2013,3:30 PM,7/19/2013,6:45 AM,Long Island Power Authority,"Holtsville, New York",NPCC,417,Unknown,"fuel supply emergency, natural gas",15:30:00
720,Severe Weather,2011,4/27/2011,10:00 AM,4/29/2011,4:29 PM,Tennessee Valley Authority,"Alabama, Georgia, Mississippi, Tennessee",SERC,UNK,612000,severe weather,10:00:00
132,Physical Attack,2013,12/27/2013,9:43 AM,12/27/2013,12:43 PM,Arizona Public Service Co,Gilabend Arizona,WECC,,,"vandalism, physical",09:43:00
1520,Public Appeal to Reduce Load,2004,1/8/2004,3:00 p.m.,1/10/2004,7:00 p.m.,National Grid (New York),"Lake Placid/Saranac, New York",NPCC,100,18600,public appeal,15:00:00
1020,Ice Storm,2009,1/27/2009,5:00 a.m.,1/29/2009,4:30 p.m.,Louisville Gas and Electric/Kentucky Utilities,State of Kentucky,RFC,,383000,"severe weather, winter storm",05:00:00
808,Severe Weather,2010,12/26/2010,8:15 a.m.,12/26/2010,4:15 PM,Progress Energy Carolinas,Carolina,SERC,,42000,severe weather,08:15:00
387,Severe Weather - Thunderstorms,2012,7/26/2012,6:14 PM,7/27/2012,6:14 PM,FirstEnergy Corp.: Pennsylvania Electric Company,Western Pennsylvania,RFC,,65112,"severe weather, thunderstorm",18:14:00
1129,Thunderstorms,2008,6/4/2008,3:04 p.m.,6/5/2008,9:30 p.m.,Dominion-Virginia Power,Northern Virginia,SERC,850,253800,"severe weather, thunderstorm",15:04:00


In [55]:
# Checking the data loss by parsing
power_outages_data_copy.count()

Event Description               1652
Year                            1652
Date Event Began                1652
Time Event Began                1643
Date of Restoration             1638
Time of Restoration             1632
Respondent                      1652
Geographic Areas                1651
NERC Region                     1650
Demand Loss (MW)                1246
Number of Customers Affected    1434
Tags                            1651
Time Event Began Parsed         1640
dtype: int64

In [56]:
# drop the old column
power_outages_data_copy = power_outages_data_copy.drop("Time Event Began", axis=1)
power_outages_data_copy.sample(10)

Unnamed: 0,Event Description,Year,Date Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Time Event Began Parsed
502,Suspected Physical Attack,2011,12/20/2011,12/20/2011,9:31 AM,City of Tacoma (TPWR),"Tacoma, Washington",WECC,,,"vandalism, physical",09:30:00
590,Suspected Physical Attack,2011,8/25/2011,8/25/2011,12:01 AM,Central Maine Power Company,Maine,NPCC,0,0,"vandalism, physical",00:01:00
1184,Brush Fire/Load Shedding,2007,10/26/2007,10/26/2007,10:46 a.m.,Southern California Edison Company,Southern Califonia,WECC,280,20345,"wild fire, load shedding",06:44:00
655,Public Appeal to Reduce Electricity Usage,2011,6/29/2011,6/29/2011,6:04 PM,Southwestern Public Service,"Panhandle and Muleshoe, Texas",SPP,0,0,public appeal,11:30:00
1285,Tropical Storm Ernesto,2006,9/1/2006,9/4/2006,2:00 p.m.,Delmarva Power,Southern Delmarva Peninsula,RFC,380,105000,"severe weather, hurricane/tropical storm",10:00:00
118,Public Appeal due to Severe Weather - Cold,2014,1/7/2014,1/7/2014,8:30 AM,Tennessee Valley Authority,Northeast Tennessee,SERC,Unknown,Unknown,"severe weather, cold",06:00:00
549,Vandalism,2011,9/24/2011,9/25/2011,4:00 PM,City of Tacoma (TPWR),"Collins Substation, Tacoma, Washington",WECC,,,vandalism,16:17:00
666,Severe Weather,2011,6/21/2011,6/22/2011,7:00 AM,American Electric Power (AEP),AEP Region,RFC,UNK,56000,severe weather,18:30:00
505,Suspected Physical Attack,2011,12/13/2011,12/14/2011,3:19 AM,Dayton Power & Light,Clinton County Ohio,RFC,,0,"vandalism, physical",03:19:00
1045,Brush Fire/Shed Firm Load,2008,11/15/2008,11/15/2008,10:10 a.m.,Los Angeles Department of Water and Power,City of Los Angeles,WECC,211,115500,"wild fire, load shedding",09:39:00


In [57]:
# a similiar handling for the 'Time of Restoration'
power_outages_data_copy['Time of Restoration'].unique()

array(['6:30 PM', '5:00 PM', '2:53 AM', 'Unknown', '2:55 PM', '8:48 AM',
       '7:00 PM', '3:00 PM', '1:00 AM', '9:11 AM', '4:30 PM', '9:31 AM',
       '2:30 PM', '11:30 AM', '5:30 AM', '11:45 PM', '1:07 PM', '3:59 PM',
       '1:43 AM', '4:53 PM', '1:18 PM', '7:02 AM', '9:00 PM', '12:13 PM',
       '1:15 PM', '6:39 PM', '1:00 PM', '8:40 AM', '1:01 AM', '2:00 PM',
       '12:30 PM', '9:00 AM', '12:09 PM', '7:30 AM', '5:13 PM', '8:37 PM',
       '7:30 PM', '1:01 PM', '11:20 AM', '8:15 AM', '11:53 AM', '8:08 PM',
       '2:33 PM', '12:00 PM', '11:08 AM', '12:01 AM', '12:56 AM',
       '4:45 PM', '12:00 AM', '5:07 PM', '9:06 AM', '5:25 PM', '3:28 PM',
       '1:49 AM', '11:59 PM', '6:44 PM', '3:20 PM', '2:16 PM', '4:30 AM',
       '8:30 PM', '8:00 AM', '10:00 PM', '8:40 PM', '7:39 PM', '7:15 PM',
       '6:00 PM', '8:06 AM', '7:00 AM', '5:01 AM', '4:03 AM', '4:44 PM',
       '11:00 AM', '8:19 PM', '3:08 AM', '2:45 AM', '12:39 PM', '9:45 AM',
       '12:20 PM', '1:57 PM', '2:45 PM', '6:30

In [58]:
power_outages_data_copy["Time of Restoration Parsed"] = pd.to_datetime(power_outages_data_copy["Time of Restoration"], infer_datetime_format=True, errors='coerce').dt.time
power_outages_data_copy.sample(10)

  power_outages_data_copy["Time of Restoration Parsed"] = pd.to_datetime(power_outages_data_copy["Time of Restoration"], infer_datetime_format=True, errors='coerce').dt.time
  power_outages_data_copy["Time of Restoration Parsed"] = pd.to_datetime(power_outages_data_copy["Time of Restoration"], infer_datetime_format=True, errors='coerce').dt.time


Unnamed: 0,Event Description,Year,Date Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Time Event Began Parsed,Time of Restoration Parsed
758,Vandalism,2011,3/18/2011,3/18/2011,3:34 PM,Dayton Power & Light,"Greene County, Ohio",RFC,,,vandalism,09:54:00,15:34:00
262,Severe Weather - Storms and Wind,2013,4/18/2013,4/21/2013,3:30 AM,Detroit Edison Co,"Southeast Michigan, Michigan",RFC,Unknown,99188,"severe weather, wind",15:00:00,03:30:00
560,Transmission/Distribution Interruption; Load S...,2011,9/8/2011,9/10/2011,3:30 PM,WECC Reliability Coordinator,Arizona; California,WECC,7000,2000000,"transmission interruption, distribution interr...",15:28:00,15:30:00
288,Severe Weather - Winter Storm Nemo,2013,2/8/2013,2/11/2013,8:30 PM,ISO New England/National Grid,Central and eastern Massachusetts; Rhode Island,NPCC,,50000,"severe weather, winter storm",20:00:00,20:30:00
1431,Major Freezing Rain and Ice Storm,2004,12/23/2004,12/31/2004,11:00 p.m.,American Electric Power,Columbus District,ECAR,800,359171,"severe weather, winter storm",03:37:00,23:00:00
310,Suspected Physical Attack,2012,12/3/2012,12/3/2012,12:30 PM,Tacoma Power (TPWR),"Tacoma, Washington",WECC,0,0,"vandalism, physical",12:02:00,12:30:00
1370,Voltage Reduction/Load Shed,2005,9/13/2005,9/13/2005,2:29 p.m.,Puerto Rico Electric Power Authority,Island of Puerto Rico,PR,249,66480,"voltage reduction, load shedding",14:14:00,14:29:00
355,Suspected Physical Attack,2012,10/24/2012,10/24/2012,12:01 AM,Potomac Electric Power Company (PEPCO),"Brighton, Maryland",RFC,0,0,"vandalism, physical",00:00:00,00:01:00
1241,Heavy Snow Storm,2007,4/5/2007,4/6/2007,1:10 p.m.,Central Maine Power Company,Southern and Coastal Maine,NPCC,-,117142,"severe weather, winter storm",21:20:00,13:10:00
74,Fuel Supply Emergency - Coal,2014,2/7/2014,3/21/2014,8:00 AM,"Somerset Operating Company, LLC",Niagara County New York,NPCC,675,Unknown,"fuel supply emergency, coal",07:00:00,08:00:00


In [60]:
power_outages_data_copy = power_outages_data_copy.drop("Time of Restoration", axis=1)
power_outages_data_copy.sample(10)

Unnamed: 0,Event Description,Year,Date Event Began,Date of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Time Event Began Parsed,Time of Restoration Parsed
523,Severe Weather,2011,10/29/2011,10/31/2011,"Orange and Rockland Utilities, Inc",New Jersey; New York,"NPCC, RFC",,74000,severe weather,20:00:00,20:00:00
708,Suspected Physical Attack,2011,5/9/2011,5/9/2011,"Exelon Generation Company, LLC","Holtwood, Pennsylvania",RFC,630,UNK,"vandalism, physical",04:08:00,06:40:00
1298,Widespread Heat Wave/CAISO Implementation of ...,2006,7/24/2006,7/24/2006,California ISO,California,WECC,695,,"severe weather, heat",14:33:00,17:33:00
187,Physcial Attack; Vandalism,2013,8/7/2013,8/8/2013,"Caithness Shepherds Flat, LLC","Arlington, Oregon",WECC,Unknown,Unknown,"vandalism, physical",14:30:00,13:00:00
252,Vandalism/Theft,2013,5/14/2013,5/14/2013,PacifiCorp,"Portland, Oregon",WECC,,,"vandalism, physical",00:01:00,13:59:00
404,Severe Weather - Wind & Storms,2012,7/5/2012,7/6/2012,Tennessee Valley Authority (TVA),Northeast Tennessee,SERC,,50001,"severe weather, wind, storm",19:00:00,16:00:00
492,Physical Attack,2012,1/12/2012,1/12/2012,Delmarva Power & Light Company,"Newark, Delaware",RFC,0,0,"vandalism, physical",08:26:00,08:26:00
1527,Transmission Equipment,2003,12/5/2003,12/5/2003,City of Homestead,State of Florida - Dade County,FRCC,27,16500,transmission interruption,04:49:00,06:25:00
1492,Public Appeals,2004,5/28/2004,5/31/2004,Seminole Electric Cooperative,"Florida counties of Gadsden, Wakulla, Leon, an...",FRCC,0,0,public appeal,12:00:00,00:00:00
1078,Tropical Storm Fay,2008,8/24/2008,8/24/2008,Southern Company,Georgia and Alabama,SERC,110,87390,"severe weather, hurricane/tropical storm",04:30:00,14:00:00


In [61]:
# rename the columns
power_outages_data_copy = power_outages_data_copy.rename(columns={'Time of Restoration Parsed': 'Time of Restoration', 'Time Event Began Parsed': 'Time Event Began'})
power_outages_data_copy.sample(10)

Unnamed: 0,Event Description,Year,Date Event Began,Date of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Time Event Began,Time of Restoration
1098,Heat Wave/Potential Fire Threat/Made Public Ap...,2008,7/7/2008,7/10/2008,California ISO,ISO Balancing Area,WECC,0,0,"severe weather, heat, public appeal",12:15:00,17:00:00
1528,Fault on 138 KV line,2003,12/4/2003,12/8/2003,Wisconsin Electric Power Company,Upper Peninsula of Michigan and Northeastern W...,MAIN,500,36000,equipment failure,22:15:00,08:30:00
202,Physical Attack; Vandalism,2013,7/18/2013,7/18/2013,PacifiCorp,Utah,WECC,,,"vandalism, physical",03:15:00,03:59:00
517,Suspected Physical Attack,2011,11/22/2011,11/22/2011,Northern Indiana Public Service Co,Indiana,RFC,0,0,"vandalism, physical",23:16:00,23:16:00
1315,Severe Weather,2006,5/25/2006,5/27/2006,"Duke Energy - Ohio, Kentucky, Indiana","Southwest Ohio, Northern Kentucky, Central Ind...",RFC,800,210000,severe weather,19:50:00,09:00:00
264,"Loss of Part of a High Voltage Substation, Phy...",2013,4/16/2013,4/18/2013,Pacific Gas & Electric Co,California,WECC,,0,vandalism,01:47:00,15:25:00
223,Suspected Cyber Attack,2013,6/21/2013,10/30/2013,ITC Transmission,"Michigan, Iowa",MRO,Unknown,Unknown,"vandalism, cyber",08:31:00,14:09:00
53,Physical Attack - Sabatoge,2014,3/17/2014,3/18/2014,Arizona Public Service Co,"Glendale, Arizona",WECC,,,vandalism,17:25:00,00:56:00
1283,Tropical Storm Ernesto,2006,9/1/2006,9/4/2006,Atlantic City Electric,Southern New Jersey Counties,RFC,400,100000,"severe weather, hurricane/tropical storm",20:00:00,17:00:00
1513,Public Appeal to Reduce Load,2004,2/14/2004,2/16/2004,National Grid (Niagara Mohawk),"Lake Colby, Lake Placid, Tupper Lake",NPCC,Approx. 30,18600,public appeal,20:00:00,NaT


In [62]:
power_outages_data_copy.to_csv("power_outages_data_copy.csv")