In [1]:
# import useful libraries
import pandas as pd
import numpy as np
import reverse_geocoder as rg
import os

# enable copy on write (default in pandas 3.0)
pd.options.mode.copy_on_write = True

### Read weather function

In [2]:
def read_weather(year, event_type):
    files = os.listdir('../weather_data/'+event_type)
    file_name = [fname for fname in files if str(year) in fname]
    if len(file_name) == 0:
        print("No file in that year for that event type")
        return 
    if len(file_name) > 1:
        print("Multiple files with that year in their name")
        return
    if event_type == 'lightning':
        return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZDAY'])
    return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZTIME'])

In [3]:
# Read the files
power_2023 = pd.read_excel('../power_data/2023_Annual_Summary.xls', skiprows=1)
weather_2023 = pd.read_csv('../weather_data/cleaned/tvs-2023.csv', parse_dates=['DATE'])
weather_2023.sample(5)



Unnamed: 0.1,Unnamed: 0,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,AVGDV_max,LLDV_max,MXDV_max,MXDV_HEIGHT_max,DEPTH_max,MAX_SHEAR_max,MAX_SHEAR_HEIGHT_max
33405,33405,2023-09-21,KMTX,D4,40.76288,40.76288,40.76288,-111.97093,-111.97093,-111.97093,36,65,65,1,5.0,28,1.0
35058,35058,2023-10-14,KPUX,A0,37.8771,37.8771,37.8771,-104.96625,-104.96625,-104.96625,44,42,90,7,5.1,28,6.8
12042,12042,2023-05-24,KDFW,Y1,33.45809,33.45809,33.45809,-97.70359,-97.70359,-97.70359,33,83,83,6,22.7,29,6.2
34820,34820,2023-10-05,KFWS,M2,32.47399,32.47399,32.47399,-96.38379,-96.38379,-96.38379,28,65,65,4,7.3,22,4.3
15131,15131,2023-06-14,KMXX,J2,33.27123,33.27123,33.27123,-85.52263,-85.52263,-85.52263,33,51,51,6,11.4,17,6.1


In [4]:
power_2023.sample(2)

Unnamed: 0,Event Month,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Area Affected,NERC Region,Alert Criteria,Event Type,Demand Loss (MW),Number of Customers Affected
0,January,01/01/2023,00:06:00,01/01/2023,03:57:00,California: Riverside County;,WECC,Damage or destruction of its Facility that res...,Vandalism,0,0
220,August,,00:15:00,2023-07-28 00:00:00,05:06:00,Nebraska: Scotts Bluff County;,SERC,"Unexpected Transmission loss within its area, ...",- Failure at high voltage substation or switch...,0,0


### Finding the county of the lightning event using reverse_geocoder

In [5]:
locations = list(zip(weather_2023['LAT_mean'], weather_2023['LON_mean']))
weather_2023['location'] = locations

# then get the county (admin2) into a new column
address = rg.search(locations)
weather_2023['county'] = [x['admin2'] for x in address]
weather_2023['state'] = [x['admin1'] for x in address]

Loading formatted geocoded file...


In [6]:
# Filter power outages by severe weather only
sev_weather_power_2023 = power_2023[power_2023['Event Type'].str.contains(r'Severe Weather', regex=True)]
# convert to string datatype
sev_weather_power_2023['Date of Restoration'] = sev_weather_power_2023['Date of Restoration'].astype(str)
sev_weather_power_2023['Area Affected'] = sev_weather_power_2023['Area Affected'].astype(str)
weather_2023['county'] = weather_2023['county'].astype(str)
weather_2023['state'] = weather_2023['state'].astype(str)

In [7]:
# drop rows when 'county' column is empty, and lat lon is outside US range
# first we create a mask for nonempty county and continental US range lat 24 to 50 and lon -125 to -66
lightning_mask = (weather_2023['county']!='') & (weather_2023['LAT_mean']>=24) & (weather_2023['LAT_mean']<=50) & (weather_2023['LON_mean']>=-125) &(weather_2023['LON_mean']<=-66)
weather_2023 = weather_2023[lightning_mask]
weather_2023.sample(5)


Unnamed: 0.1,Unnamed: 0,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,AVGDV_max,LLDV_max,MXDV_max,MXDV_HEIGHT_max,DEPTH_max,MAX_SHEAR_max,MAX_SHEAR_HEIGHT_max,location,county,state
27138,27138,2023-08-07,KATL,L1,32.94187,32.93858,32.94516,-84.644855,-84.7083,-84.58141,50,72,85,25,18.8,30,25.3,"(32.94187, -84.644855)",Meriwether County,Georgia
30513,30513,2023-08-25,KDTW,Y0,42.886165,42.87783,42.8945,-83.515,-83.515,-83.515,41,44,100,12,29.2,35,31.3,"(42.886165000000005, -83.515)",Genesee County,Michigan
3145,3145,2023-03-02,KDAL,Y7,33.310743,33.27889,33.36038,-97.60287,-97.66858,-97.54479,53,95,113,27,24.1,50,27.4,"(33.3107425, -97.60287)",Wise County,Texas
26398,26398,2023-08-02,KMIA,Y6,26.26219,26.26219,26.26219,-80.85455,-80.85455,-80.85455,57,86,86,2,13.5,38,1.6,"(26.26219, -80.85455)",Palm Beach County,Florida
26484,26484,2023-08-03,KATL,J0,34.15327,34.14881,34.15773,-84.465865,-84.48181,-84.44992,55,82,90,9,21.1,45,8.7,"(34.15327, -84.46586500000001)",Cherokee County,Georgia


In [16]:
# when Date of Restoration is Unknown, copy Date of Event
sev_weather_power_2023.loc[sev_weather_power_2023['Date of Restoration'] == 'Unknown', 'Date of Restoration'] = sev_weather_power_2023.loc[sev_weather_power_2023['Date of Restoration'] == 'Unknown', 'Date Event Began']
power_2023 = sev_weather_power_2023 
power_2023['Date Event Began'] = pd.to_datetime(power_2023['Date Event Began'], format='mixed')
power_2023['Date of Restoration'] = pd.to_datetime(power_2023['Date of Restoration'], format='mixed')
weather_2023['DATE'] = pd.to_datetime(weather_2023['DATE'], format='%Y-%m-%d')
power_2023_filtered = power_2023[power_2023['Date Event Began'].isin(weather_2023['DATE'])]
power_2023_filtered.sample(5)

Unnamed: 0,Event Month,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Area Affected,NERC Region,Alert Criteria,Event Type,Demand Loss (MW),Number of Customers Affected
89,March,2023-03-14,09:25:00,2023-03-15,15:00:00,New York:,NPCC,"Loss of electric service to more than 50,000 c...",Severe Weather,Unknown,Unknown
146,May,2023-05-22,15:39:00,2023-05-22,18:00:00,Florida: Putnam County;,SERC,"Unexpected Transmission loss within its area, ...",Severe Weather,0,0
58,February,2023-02-23,04:30:00,2023-02-23,13:00:00,"Wisconsin: Kenosha County, Racine County, Milw...",MRO,"Loss of electric service to more than 50,000 c...",Severe Weather,143,57000
5,January,2023-01-02,18:22:00,2023-01-02,22:27:00,Arkansas: Union County; Louisiana: Ouachita Pa...,SERC,"Unexpected Transmission loss within its area, ...",Severe Weather,20,6849
132,April,2023-04-29,00:00:00,2023-04-29,20:00:00,"Texas: Hidalgo County, Cameron County, Willacy...",TRE,"Loss of electric service to more than 50,000 c...",Severe Weather,Unknown,168419


In [17]:
county = pd.read_csv("../extras/uscounties.csv", index_col=0)
county['county'] = county['county'].astype(str)
power_2023_filtered["county_info_area_affected"] = power_2023_filtered.apply(lambda row: any(counti in row['Area Affected'] for counti in county['county']) , axis=1)
left_merged = pd.merge(weather_2023, power_2023_filtered, how='left', left_on='DATE', right_on='Date Event Began', indicator=True)
filtered_merge = left_merged[left_merged.apply(lambda row: (row['_merge'] == 'both') and(str(row['county']) in str(row['Area Affected']) and str(row['state']) in str(row['Area Affected'])) or ((str(row['state']) in str(row['Area Affected']))and (not row['county_info_area_affected'])), axis=1)]
left_merged['power_outage'] = left_merged.apply(lambda row: (row['_merge'] == 'both') and(str(row['county']) in str(row['Area Affected']) and str(row['state']) in str(row['Area Affected'])) or ((str(row['state']) in str(row['Area Affected']))and (not row['county_info_area_affected'])), axis=1)
columns_to_drop = ['Unnamed: 0', 'WSR_ID', 'CELL_ID', 'LAT_mean', 'LAT_min', 'LAT_max', 'LON_mean', 'LON_min', 'LON_max', 'Date Event Began', 'Time Event Began', 'Date of Restoration', 'Time of Restoration', 'Area Affected',
 'NERC Region','Alert Criteria', 'Event Type', 'Demand Loss (MW)', 'Number of Customers Affected', 'county_info_area_affected', '_merge']

weather_outage = left_merged.copy().drop(columns = columns_to_drop)



In [18]:
weather_outage['power_outage'].value_counts(normalize=True)

power_outage
False    0.975368
True     0.024632
Name: proportion, dtype: float64

In [21]:
# list those that gave power outages
weather_outage[weather_outage['power_outage']]

Unnamed: 0,DATE,AVGDV_max,LLDV_max,MXDV_max,MXDV_HEIGHT_max,DEPTH_max,MAX_SHEAR_max,MAX_SHEAR_HEIGHT_max,location,county,state,Event Month,power_outage
818,2023-01-12,46,31,86,13,10.2,31,12.8,"(33.96648, -85.05459)",Polk County,Georgia,January,True
819,2023-01-12,58,79,88,14,10.5,30,13.5,"(34.30398, -84.77704)",Bartow County,Georgia,January,True
820,2023-01-12,33,61,61,3,22.4,21,2.8,"(34.23385, -84.85644)",Bartow County,Georgia,January,True
821,2023-01-12,62,85,93,25,19.4,32,25.4,"(34.36025, -84.57413)",Bartow County,Georgia,January,True
822,2023-01-12,78,115,115,24,22.5,43,23.6,"(34.03463857142857, -85.01741857142858)",Polk County,Georgia,January,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13144,2023-05-01,46,61,61,2,5.4,152,1.6,"(46.00969, -67.66877)",Aroostook County,Maine,May,True
13145,2023-05-01,39,51,63,7,5.6,94,7.3,"(45.90503, -67.59313)",Aroostook County,Maine,May,True
13146,2023-05-01,43,84,84,2,10.5,120,1.6,"(46.15414, -68.01098)",Aroostook County,Maine,May,True
13147,2023-05-01,49,57,67,5,5.0,300,5.2,"(46.00555, -67.75269)",Aroostook County,Maine,May,True
