In [18]:
import pandas as pd
import numpy as np
from scipy.integrate import trapezoid

# Load outage data for years 2017 to 2023
ea17csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2017.csv")
ea18csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2018.csv")
ea19csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2019.csv")
ea20csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2020.csv")
ea21csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2021.csv")
ea22csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2022.csv")
ea23csv = pd.read_csv("/Users/derrick/GaTech Dropbox/Derrick Chen/GROWER/data/eagle-i/county/eaglei_outages_2023.csv")

# Combine all years' data into a single DataFrame
outages = pd.concat([ea17csv, ea18csv, ea19csv, ea20csv, ea21csv, ea22csv, ea23csv], ignore_index=True)

# Remove NA values
outages = outages.dropna()

# Extract data for specific regions
ms_out = outages[(outages['fips_code'] >= 28000) & (outages['fips_code'] <= 28999)]


In [24]:
ms_out

Unnamed: 0,fips_code,county,state,sum,run_start_time,time_diff,new_outage,outage_ID
0,28011,Bolivar,Mississippi,1.0,2017-01-01 00:00:00,,1,1
1,28153,Wayne,Mississippi,413.0,2017-01-01 00:00:00,0.0,0,1
2,28149,Warren,Mississippi,12.0,2017-01-01 00:00:00,0.0,0,1
3,28125,Sharkey,Mississippi,28.0,2017-01-01 00:00:00,0.0,0,1
4,28103,Noxubee,Mississippi,30.0,2017-01-01 00:00:00,0.0,0,1
...,...,...,...,...,...,...,...,...
3809046,28049,Hinds,Mississippi,1.0,2023-12-31 23:45:00,0.0,0,294
3809047,28031,Covington,Mississippi,2.0,2023-12-31 23:45:00,0.0,0,294
3809048,28029,Copiah,Mississippi,10.0,2023-12-31 23:45:00,0.0,0,294
3809049,28067,Jones,Mississippi,2.0,2023-12-31 23:45:00,0.0,0,294


In [25]:
ms_out.loc[:, 'run_start_time'] = pd.to_datetime(ms_out['run_start_time'], errors='coerce')
ms_out = ms_out.dropna(subset=['run_start_time']).sort_values('run_start_time').reset_index(drop=True)
ms_out['time_diff'] = ms_out['run_start_time'].diff().dt.total_seconds() / 60
ms_out['new_outage'] = np.where((ms_out['time_diff'].isna()) | (ms_out['time_diff'] > 15), 1, 0)
ms_out['outage_ID'] = ms_out['new_outage'].cumsum()
ms_out


Unnamed: 0,fips_code,county,state,sum,run_start_time,time_diff,new_outage,outage_ID
0,28011,Bolivar,Mississippi,1.0,2017-01-01 00:00:00,,1,1
1,28067,Jones,Mississippi,1.0,2017-01-01 00:00:00,0.0,0,1
2,28035,Forrest,Mississippi,7.0,2017-01-01 00:00:00,0.0,0,1
3,28037,Franklin,Mississippi,2.0,2017-01-01 00:00:00,0.0,0,1
4,28039,George,Mississippi,1.0,2017-01-01 00:00:00,0.0,0,1
...,...,...,...,...,...,...,...,...
3809046,28071,Lafayette,Mississippi,7.0,2023-12-31 23:45:00,0.0,0,294
3809047,28107,Panola,Mississippi,1.0,2023-12-31 23:45:00,0.0,0,294
3809048,28089,Madison,Mississippi,1.0,2023-12-31 23:45:00,0.0,0,294
3809049,28113,Pike,Mississippi,13.0,2023-12-31 23:45:00,0.0,0,294


In [29]:
ms_out_grouped = ms_out.groupby(['fips_code', 'outage_ID'])

In [31]:
ms_out

Unnamed: 0,fips_code,county,state,sum,run_start_time,time_diff,new_outage,outage_ID
0,28011,Bolivar,Mississippi,1.0,2017-01-01 00:00:00,,1,1
1,28067,Jones,Mississippi,1.0,2017-01-01 00:00:00,0.0,0,1
2,28035,Forrest,Mississippi,7.0,2017-01-01 00:00:00,0.0,0,1
3,28037,Franklin,Mississippi,2.0,2017-01-01 00:00:00,0.0,0,1
4,28039,George,Mississippi,1.0,2017-01-01 00:00:00,0.0,0,1
...,...,...,...,...,...,...,...,...
3809046,28071,Lafayette,Mississippi,7.0,2023-12-31 23:45:00,0.0,0,294
3809047,28107,Panola,Mississippi,1.0,2023-12-31 23:45:00,0.0,0,294
3809048,28089,Madison,Mississippi,1.0,2023-12-31 23:45:00,0.0,0,294
3809049,28113,Pike,Mississippi,13.0,2023-12-31 23:45:00,0.0,0,294


In [22]:
# Grouped calculations for each outage block in `ms_out`
ms_out_grouped = ms_out.groupby(['fips_code', 'outage_ID'])
outage_begin, outage_end, cumulative_time, cumulative_custm, auc_list = [], [], [], [], []

for name, group in ms_out_grouped:
    start_time = group['run_start_time'].iloc[0]
    end_time = group['run_start_time'].iloc[-1]
    time_elapsed = (group['run_start_time'] - start_time).dt.total_seconds() / 60
    cumulative_sum = group['sum'].cumsum()
    auc_value = trapezoid(y=[0] + cumulative_sum.tolist(), x=[0] + time_elapsed.tolist())
    
    outage_begin.extend([start_time] * len(group))
    outage_end.extend([end_time] * len(group))
    cumulative_time.extend(time_elapsed + 15)
    cumulative_custm.extend(cumulative_sum)
    auc_list.extend([auc_value] * len(group))


# Add calculated columns to `ms_out`
ms_out['outage_begin'] = outage_begin
ms_out['outage_end'] = outage_end
ms_out['cumulative_time'] = cumulative_time
ms_out['cumulative_custm'] = cumulative_custm
ms_out['auc'] = auc_list
ms_out


TypeError: unsupported operand type(s) for -: 'numpy.ndarray' and 'Timestamp'

In [26]:
ms_out_grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x177b54860>

In [23]:
ms_out_grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x177b54860>

In [13]:
# Consolidate `ms_out_avg` DataFrame
ms_out_avg = (
    ms_out.groupby(['fips_code', 'outage_ID'], as_index=False)
    .agg(
        fipsCode=('fips_code', 'first'),
        outage_ID=('outage_ID', 'first'),
        avg=('sum', 'mean'),
        outage_begin=('outage_begin', 'first'),
        outage_end=('outage_end', 'first'),
        duration=('cumulative_time', 'last'),
        tot_custm=('cumulative_custm', 'last'),
        auc=('auc', 'first')
    )
)

# Calculate auc_nrm
ms_out_avg['auc_nrm'] = ms_out_avg.apply(
    lambda row: row['auc'] / row['tot_custm'] if row['tot_custm'] != 0 else np.nan, axis=1
)

# Select only the required columns
ms_out_avg = ms_out_avg[['fipsCode', 'outage_ID', 'avg', 'outage_begin', 'outage_end', 'duration', 'tot_custm', 'auc', 'auc_nrm']]

ms_out_avg


KeyError: 'outage_ID'

In [11]:
import pandas as pd
dec_1723 = pd.read_csv("/Users/derrick/Desktop/dec_1723.csv")

# Filter for "IDALIA" disasters and impacted counties
dec_disaster = dec_1723[dec_1723['disasterNumber']==4295]
imp_cties = dec_disaster['fipsCode'].unique()

# Extract incident begin and end dates
begin = pd.to_datetime(dec_disaster['incidentBeginDate'].iloc[0]).date()
end = pd.to_datetime(dec_disaster['incidentEndDate'].iloc[0]).date() + pd.Timedelta(days=1)

# Convert begin and end dates to timestamps
begin = pd.to_datetime(f"{begin} 00:00:00+00:00")
end = pd.to_datetime(f"{end} 23:59:59+00:00")



dec_disaster

Unnamed: 0.1,Unnamed: 0,femaDeclarationString,disasterNumber,state,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,...,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash,id,fipsCode,Declared,Requested
1039,1040,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,561e9c87bfad87858c155cb41f299786595f93c4,3003b260-8d09-498c-8597-0d0e58e8271f,28075,1,1
1040,1041,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,a0a687f907e588ca4ddd0e6eb3ce20c9b91a3697,58978fd3-3dff-4f64-9ddd-45525a21f014,28035,1,1
1041,1042,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,294f6ceac6836814cff52cf9034b581763d728af,1feda743-3272-4de4-9796-88762dd8239c,28073,1,1
1042,1043,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,1062259c96e45239b642909af17f3a16185ab847,ada0d2dd-2f93-487f-a009-8c95ade3c7ff,28111,1,1


In [12]:
imp_cties


array([28075, 28035, 28073, 28111])

In [19]:
df = dec_1723[dec_1723['disasterNumber'] == 4295]
df

Unnamed: 0.1,Unnamed: 0,femaDeclarationString,disasterNumber,state,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,...,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash,id,fipsCode,Declared,Requested
1039,1040,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,561e9c87bfad87858c155cb41f299786595f93c4,3003b260-8d09-498c-8597-0d0e58e8271f,28075,1,1
1040,1041,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,a0a687f907e588ca4ddd0e6eb3ce20c9b91a3697,58978fd3-3dff-4f64-9ddd-45525a21f014,28035,1,1
1041,1042,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,294f6ceac6836814cff52cf9034b581763d728af,1feda743-3272-4de4-9796-88762dd8239c,28073,1,1
1042,1043,DR-4295-MS,4295,MS,DR,2017-01-25,2017,Tornado,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",1,...,2017-03-27,2017012001,4,,2024-08-27 18:22:14.8,1062259c96e45239b642909af17f3a16185ab847,ada0d2dd-2f93-487f-a009-8c95ade3c7ff,28111,1,1


In [24]:
# Match outages for IDALIA in MS
idalia_ms = ms_out_avg[
    (ms_out_avg['fipsCode'].isin(imp_cties)) &
    (ms_out_avg['outage_begin'] <= end) &
    (ms_out_avg['outage_end'] >= begin)
]

idalia_ms

Unnamed: 0,fipsCode,outage_ID,avg,outage_begin,outage_end,duration,tot_custm,auc,auc_nrm
