This notebook will perform our temporal train/test split of the dataset, after merging the two existing datasets into one and filtering out entries in the Disaster Declaration Summaries (DDS) that do not exist in the Mission Assignments (MA)

In [398]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [399]:
pd.set_option('display.max_columns', 80)

In [400]:
ma_filepath = 'mission_assignments.parquet'
dds_filepath = 'disaster_declaration_summaries.parquet'
train_filepath = 'combined_training_set.parquet'
test_filepath = 'combined_test_set.parquet'


In [401]:
# Load initial datasets

df_dds = pd.read_parquet(dds_filepath)
df_ma = pd.read_parquet(ma_filepath)
print(df_dds.shape, df_ma.shape)

(68485, 28) (40340, 39)


In [402]:
df_dds['designatedIncidentTypes'].head(10)

0       R
1       R
2       R
3    None
4    None
5    None
6    None
7    None
8    None
9       R
Name: designatedIncidentTypes, dtype: object

Adding lists and dictionaries for later use.

In [403]:
#dictionary to convert state/territory designators to full word strings
state_dict = {'AL':'Alabama','AK':'Alaska','AZ':'Arizona','AR':'Arkansas','CA':'California','CO':'Colorado','CT':'Connecticut',
             'DE':'Delaware', 'FL':'Florida','GA':'Georgia','HI':'Hawaii','ID':'Idaho','IL':'Illinois','IN':'Indiana','IA':'Iowa',
             'KS':'Kansas','KY':'Kentucky','LA':'Louisiana','ME':'Maine','MD':'Maryland','MA':'Massachusetts','MI':'Michigan',
             'MN':'Minnesota','MS':'Mississippi','MO':'Missouri','MT':'Montana','NE':'Nebraska','NV':'Nevada','NH':'New Hampshire',
             'NM':'New Mexico','NY':'New York','NJ':'New Jersey','NC':'North Carolina','ND':'North Dakota','OH':'Ohio',
             'OK':'Oklahoma','OR':'Oregon','PA':'Pennsylvania','RI':'Rhode Island','SC':'South Carolina','SD':'South Dakota',
             'TN':'Tennessee','TX':'Texas','UT':'Utah','VT':'Vermont','VA_state':'Virginia','WA':'Washington','WV':'West Virginia',
             'WI':'Wisconsin','WY':'Wyoming','DC':'Washington, DC','GU':'Guam','PR':'Puerto Rico','AS':'American Samoa',
             'MP':'Northern Mariana Islands','FM':'Federated States of Micronesia','MH':'Marshall Islands','PW':'Palau'}

state_list = ['AL','AZ','AR','CA','CO','CT','DE', 'FL','GA','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI',
             'MN','MS','MO','MT','NE','NV','NH','NM','NY','NJ','NC','ND','OH','OK','OR','PA','RI','SC','SD',
             'TN','TX','UT','VT','VA','WA','WV','WI','WY']

#set using only natural disasters that could be potentially caused by climate change
natural_disaster = ['Fire','Flood','Severe Storm','Straight-Line Winds','Winter Storm','Hurricane','Tornado','Tropical Storm',
                   'Mud/Landslide','Snowstorm','Coastal Storm','Severe Ice Storm','Typhoon','Freezing','Drought','Fishing Losses',
                   'Tropical Depression']

#manmade or other disasters that would not be caused by climate change
nonweather_disaster = ['Earthquake','Other','Biological','Dam/Levee Break','Volcanic Eruption','Toxic Substances','Chemical',
                      'Terrorist','Human Cause','Tsunami','Civil Unrest','Nuclear','Explosion','Tidal Wave']

#dictionary to convert disaster codes to strings representing each type of disaster
disaster_dict = {'0':'Not applicable','1':'Explosion','2':'Straight-Line Winds','3':'Tidal Wave','4':'Tropical Storm',
                '5':'Winter Storm','A':'Tsunami','B':'Biological','C':'Coastal Storm','D':'Drought','E':'Earthquake',
                'F':'Flood','G':'Freezing','H':'Hurricane','I':'Terrorist','J':'Typhoon','K':'Dam/Levee Break','L':'Chemical',
                'M':'Mud/Landslide','N':'Nuclear','O':'Severe Ice Storm','P':'Fishing Losses','Q':'Crop Losses','R':'Fire',
                'S':'Snowstorm','T':'Tornado','U':'Civil Unrest', 'V':'Volcanic Eruption','W':'Severe Storm','X':'Toxic Substances',
                'Y':'Human Cause','Z':'Other', '8':'Tropical Depression'}

agencyid_dict = {'CISA':'DHS-CISA','DHSMGMT':'DHS-MGMT','USDANRCS':'USDA-NRCS','GSA-':'GSA','VA-':'VA','EPA-':'EPA','DOT-':'DOT',
                'CNCS-':'CNCS','FCC-':'FCC','DOED':'DOE','DHUD':'HUD','DOD-':'DOD','VA -':'VA','USDAOCIO':'USDA-OCIO','FPS':'DHS-FPS',
                'TSA':'DHS-TSA','ICE':'DHS-ICE','USCIS':'DHS-CIS','DLA':'DOD-DLA','CBP':'DHS-CBP','NPS':'DOI-NPS','NPPD':'DHS-CISA',
                'CDC':'HHS-CDC','USAF':'DOD-USAF','OSHA':'DOL-OSHA','DHS-MGT':'DHS-MGMT','USGS':'DOI-USGS','USCG':'DHS-USCG',
                'USDJ':'DOJ','DHS-MGA':'DHS-IA','FLETC':'DHS-FLETC','DHS-FLET':'DHS-FLETC','USFS':'USDA-FS','HHS -PSC':'HHS-PSC'}

In [404]:
df_dds['designatedIncidentTypes'].sample(30)

55589            W
1127       5,W,F,T
20806         None
12751         None
11755         None
19212         None
36133         None
6175          None
44694         None
40614         None
47278            T
14042         None
10591         None
3011          None
23714         None
26385         None
12296         None
56136         None
22746         None
38162            T
23283         None
41859            T
46671         None
59562            W
35856         None
45494         None
1983          None
35677         None
50938            F
14941    2,M,W,F,T
Name: designatedIncidentTypes, dtype: object

Data cleaning for MA includes 

In [405]:
df_ma=df_ma[(df_ma['declarationType']!='SU')&(df_ma['maAmendNumber']==0)&(df_ma['supportFunction']<=15)]

df_ma['supportFunction'].fillna(value=0,inplace=True)

# df_ma['stt'].replace({'VA':'VA_state'},inplace=True)

df_ma['agencyId'].replace(agencyid_dict,inplace=True)

column_list_ma = ['incidentId','stt','incidentType','region','maType','maPriority','supportFunction','agencyId', 'maId',
              'declarationType', 'assistanceRequested', 'statementOfWork']
df_ma = df_ma.reindex(columns=column_list_ma)

df_ma.drop_duplicates(inplace=True)

df_ma.rename(columns={'incidentType': 'incidentTypeMA'},
             inplace=True)

# df_ma.drop(columns=[
#     'lastRefresh',
#     'hash',
#     'id',],
#     inplace=True)

df_ma.shape

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_ma['supportFunction'].fillna(value=0,inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_ma['agencyId'].replace(agencyid_dict,inplace=True)


(7044, 12)

Data cleaning for DDS includes keeping of specific columns and filtering of year and declaration type

In [406]:
# select columns necessary for data analysis, add empty columns for each natural disaster type

column_list_dds = ['femaDeclarationString','state','incidentType','incidentBeginDate','fipsStateCode','region',
               'designatedIncidentTypes','declarationTitle', 'incidentId','declarationType']

df_dds = df_dds.reindex(
    columns=column_list_dds,
    fill_value=0)

# Add time information to DDS

df_dds['incidentBeginDate']=pd.to_datetime(df_dds['incidentBeginDate'])
df_dds['year'] = df_dds['incidentBeginDate'].dt.year
df_dds['month'] = df_dds['incidentBeginDate'].dt.month
df_dds['day'] = df_dds['incidentBeginDate'].dt.day

# Filter out values before 2012

df_dds=df_dds[(df_dds['year']>=2012) & (df_dds['declarationType']!='FM')]
# df_dds.drop(columns=[
#     'lastRefresh',
#     'hash',
#     'id',],
#     inplace=True)
print(df_dds.shape)

#ensures that incident type is reflected in designated incident types
df_dds['designatedIncidentTypes'].fillna(df_dds['incidentType'], inplace = True)

df_dds.drop_duplicates(inplace=True)
df_dds.reset_index(inplace = True,
                   drop=True)

df_dds.shape


(26041, 13)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_dds['designatedIncidentTypes'].fillna(df_dds['incidentType'], inplace = True)


(1123, 13)

In [407]:
dds_column_list = df_dds.columns.to_list()
dds_column_list

['femaDeclarationString',
 'state',
 'incidentType',
 'incidentBeginDate',
 'fipsStateCode',
 'region',
 'designatedIncidentTypes',
 'declarationTitle',
 'incidentId',
 'declarationType',
 'year',
 'month',
 'day']

In [408]:
df_ma.rename(columns={'stt':'state'},inplace=True)
ma_column_list = df_ma.columns.to_list()
ma_column_list

['incidentId',
 'state',
 'incidentTypeMA',
 'region',
 'maType',
 'maPriority',
 'supportFunction',
 'agencyId',
 'maId',
 'declarationType',
 'assistanceRequested',
 'statementOfWork']

In [409]:
print(df_dds['incidentId'].nunique(), df_ma['incidentId'].nunique())

662 326


In [410]:
overlapping_columns = list(set(ma_column_list).intersection(set(dds_column_list)))
overlapping_columns

['region', 'incidentId', 'declarationType', 'state']

In [455]:
MA_disaster_combined=df_ma.merge(
    df_dds, 
    how='left',
    on=overlapping_columns,
    validate='m:m')

In [456]:
MA_disaster_combined.shape

(7699, 21)

In [457]:
MA_disaster_combined.drop_duplicates(inplace=True)
MA_disaster_combined.shape


(7699, 21)

In [458]:
MA_disaster_combined['designatedIncidentTypes'].fillna(MA_disaster_combined['incidentTypeMA'], inplace = True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  MA_disaster_combined['designatedIncidentTypes'].fillna(MA_disaster_combined['incidentTypeMA'], inplace = True)


In [459]:
# MA_disaster_combined['designatedIncidentTypes'] = MA_disaster_combined['designatedIncidentTypes'].str.split(',')
# MA_disaster_combined['designatedIncidentTypes'] = MA_disaster_combined['designatedIncidentTypes'].replace(disaster_dict)
# MA_disaster_combined['designatedIncidentTypes'] = MA_disaster_combined['designatedIncidentTypes'].apply(
#     lambda x: (
#         [disaster_dict.get(s, s) for s in x]
#         if isinstance(x, list)
#         else x)
# )

# MA_disaster_combined['designatedIncidentTypes'].iloc[0]

MA_disaster_combined['designatedIncidentTypes'] = MA_disaster_combined['designatedIncidentTypes'].str.split(',').apply(
    lambda lst: [s.strip() for s in lst] if isinstance(lst, list) else lst).apply(
    lambda lst: [disaster_dict.get(s, s) for s in lst] if isinstance(lst, list) else lst).apply(
    lambda lst: ','.join(lst) if isinstance(lst, list) else str(lst))

In [460]:
MA_disaster_combined[['incidentTypeMA','designatedIncidentTypes']].sample(30)

Unnamed: 0,incidentTypeMA,designatedIncidentTypes
102,Fire,Fire
2574,Biological,Biological
5705,Hurricane,Hurricane
6846,Tropical Storm,"Tropical Storm,Hurricane"
1371,Hurricane,Hurricane
2177,Biological,Biological
3623,Biological,Biological
7163,Tropical Depression,Tropical Depression
2435,Biological,Biological
4906,Hurricane,Hurricane


In [461]:
type(MA_disaster_combined['designatedIncidentTypes'].iloc[0])

str

In [462]:
MA_disaster_combined.isna().sum()

incidentId                   0
state                        0
incidentTypeMA               0
region                       0
maType                       0
maPriority                   0
supportFunction              0
agencyId                     0
maId                         0
declarationType              0
assistanceRequested          0
statementOfWork              0
femaDeclarationString      156
incidentType               156
incidentBeginDate          156
fipsStateCode              156
designatedIncidentTypes      0
declarationTitle           156
year                       156
month                      156
day                        156
dtype: int64

In [463]:
df_dds[df_dds['incidentId']==2024042701]

Unnamed: 0,femaDeclarationString,state,incidentType,incidentBeginDate,fipsStateCode,region,designatedIncidentTypes,declarationTitle,incidentId,declarationType,year,month,day
14,DR-4781-TX,TX,Flood,2024-04-26 00:00:00+00:00,48,6,W,"SEVERE STORMS, STRAIGHT-LINE WINDS, TORNADOES,...",2024042701,DR,2024,4,26
1048,DR-4776-OK,OK,Tornado,2024-04-25 00:00:00+00:00,40,6,"2,W,F,T","SEVERE STORMS, STRAIGHT-LINE WINDS, TORNADOES,...",2024042701,DR,2024,4,25


In [464]:
df_ma[df_ma['incidentId']==2024042701]

Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork
401,2024042701,TX,Severe Storm,6,FOS,High,6.0,HUD,4781DRTXHUD01,DR,Request HUD specialized expertise to FEMA to s...,"As directed by and in coordination with FEMA, ..."
403,2024042701,TX,Severe Storm,6,FOS,High,12.0,DOE-OE,4781DRTXDOE-OE01,DR,"Activate DOE to RRCC. IOF, JFO, to include vir...","As directed by and in coordination with FEMA, ..."
404,2024042701,TX,Severe Storm,6,FOS,High,13.0,DHS-FPS,4781DRTXDHS-FPS01,DR,Request for Contract Security Officers to supp...,"As directed by and in coordination with FEMA, ..."
406,2024042701,TX,Severe Storm,6,FOS,High,2.0,DHS-CISA,4781DRTXDHS-CISA01,DR,**This MA includes and replaces 4781DR-TX-CISA...,"As directed by and in coordination with FEMA, ..."
408,2024042701,TX,Severe Storm,6,FOS,High,3.0,COE-SWD,4781DRTXCOE-SWD02,DR,This MA replaces the verbal activation work an...,"As directed by and in coordination with FEMA, ..."
410,2024042701,TX,Severe Storm,6,FOS,High,2.0,DHS-CISA,4781DRTXCISA01,DR,"Activate CISA (ESF#2) to RRCC, IOF, JFO to inc...","As directed by and in coordination with FEMA, ..."
411,2024042701,OK,Severe Storm,6,FOS,High,5.0,DOD-USAF,4776DROKDOD-USAF01,DR,This MA is being reissued under 4776DR-OK and ...,"As directed by and in coordination with FEMA, ..."
419,2024042701,OK,Severe Storm,6,FOS,High,3.0,COE-SWD,4776DROKCOE-SWD01,DR,USACE specialized expertise for oversight of d...,"As directed by and in coordination with FEMA, ..."
29433,2024042701,TX,Severe Storm,6,FOS,High,7.0,GSA,4781DRTXGSA01,DR,"Activate and deploy GSA to RRCC, IOF, JFO, or ...","As directed by and in coordination with FEMA, ..."
29676,2024042701,OK,Severe Storm,6,FOS,Normal,13.0,DHS-FPS,4776DROKDHS-FPS01,DR,Contract security officers to support the FEMA...,"As directed by and in coordination with FEMA, ..."


In [465]:
print(MA_disaster_combined[MA_disaster_combined['incidentTypeMA'].isna()]['incidentId'].value_counts())
print(MA_disaster_combined[MA_disaster_combined['incidentTypeMA'].isna()]['incidentId'].nunique())

ids_without_year = MA_disaster_combined[MA_disaster_combined['incidentTypeMA'].isna()]['incidentId'].tolist()
ids_without_year

Series([], Name: count, dtype: int64)
0


[]

In [466]:
df_dds[(df_dds['incidentId'].isin(ids_without_year))]


Unnamed: 0,femaDeclarationString,state,incidentType,incidentBeginDate,fipsStateCode,region,designatedIncidentTypes,declarationTitle,incidentId,declarationType,year,month,day


In [467]:
df_ma[df_ma['incidentId']==2012102506]

Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork
27259,2012102506,RI,Hurricane,1,FOS,High,7.0,GSA,3355EMRIGSA01,EM,Provide leasing specialists to support real es...,Obtain a suitable office to act as a Joint Fie...
27260,2012102506,CT,Hurricane,1,FOS,High,2.0,NCS,3353EMCTNCS01,EM,Activate NCS in support of the RRCC and IMAT T...,Activate National Communications System (NCS) ...
27261,2012102506,CT,Hurricane,1,FOS,High,8.0,HHS,3353EMCTHHS02,EM,HHS deploy personnel from the Administration f...,For HHS to deploy approximately 2 personnel (s...
27262,2012102506,CT,Hurricane,1,FOS,Normal,7.0,GSA,3353EMCTGSA01,EM,Activate General Services Administration (GSA)...,Post-Declaration activation for appropriate GS...
27263,2012102506,CT,Hurricane,1,FOS,High,12.0,DOE,3353EMCTDOE01,EM,"Activate ESF-12, (Dept of energy) to the RRCC ...",Pre-Declaration activation for appropriate DOE...
27264,2012102506,CT,Hurricane,1,DFA,High,3.0,COE-NAD,3353EMCTCOE-NAD03,EM,Provide emergency power generation to the affe...,Deploy the Emergency Power Planning and Respon...
27265,2012102506,CT,Hurricane,1,FOS,High,3.0,COE-NAD,3353EMCTCOE-NAD02,EM,Activate and pre-position elements associated ...,Pre-event actions include deployment of Planni...
27266,2012102506,CT,Hurricane,1,FOS,High,3.0,COE-NAD,3353EMCTCOE-NAD01,EM,Activate Corps of Engineers to perform functio...,Activate U.S. Army Corps of Engineers (USACE) ...
27267,2012102506,MA,Hurricane,1,FOS,High,7.0,GSA,3350EMMAGSA01,EM,Provide leasing specialist to support real est...,Obtain a suitable office to act as a Joint Fie...


In [468]:
MA_disaster_combined[(MA_disaster_combined['incidentId']==2012102506)&(MA_disaster_combined['year'].isna())]

Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork,femaDeclarationString,incidentType,incidentBeginDate,fipsStateCode,designatedIncidentTypes,declarationTitle,year,month,day
6525,2012102506,RI,Hurricane,1,FOS,High,7.0,GSA,3355EMRIGSA01,EM,Provide leasing specialists to support real es...,Obtain a suitable office to act as a Joint Fie...,,,NaT,,Hurricane,,,,
6526,2012102506,CT,Hurricane,1,FOS,High,2.0,NCS,3353EMCTNCS01,EM,Activate NCS in support of the RRCC and IMAT T...,Activate National Communications System (NCS) ...,,,NaT,,Hurricane,,,,
6527,2012102506,CT,Hurricane,1,FOS,High,8.0,HHS,3353EMCTHHS02,EM,HHS deploy personnel from the Administration f...,For HHS to deploy approximately 2 personnel (s...,,,NaT,,Hurricane,,,,
6528,2012102506,CT,Hurricane,1,FOS,Normal,7.0,GSA,3353EMCTGSA01,EM,Activate General Services Administration (GSA)...,Post-Declaration activation for appropriate GS...,,,NaT,,Hurricane,,,,
6529,2012102506,CT,Hurricane,1,FOS,High,12.0,DOE,3353EMCTDOE01,EM,"Activate ESF-12, (Dept of energy) to the RRCC ...",Pre-Declaration activation for appropriate DOE...,,,NaT,,Hurricane,,,,
6530,2012102506,CT,Hurricane,1,DFA,High,3.0,COE-NAD,3353EMCTCOE-NAD03,EM,Provide emergency power generation to the affe...,Deploy the Emergency Power Planning and Respon...,,,NaT,,Hurricane,,,,
6531,2012102506,CT,Hurricane,1,FOS,High,3.0,COE-NAD,3353EMCTCOE-NAD02,EM,Activate and pre-position elements associated ...,Pre-event actions include deployment of Planni...,,,NaT,,Hurricane,,,,
6532,2012102506,CT,Hurricane,1,FOS,High,3.0,COE-NAD,3353EMCTCOE-NAD01,EM,Activate Corps of Engineers to perform functio...,Activate U.S. Army Corps of Engineers (USACE) ...,,,NaT,,Hurricane,,,,
6533,2012102506,MA,Hurricane,1,FOS,High,7.0,GSA,3350EMMAGSA01,EM,Provide leasing specialist to support real est...,Obtain a suitable office to act as a Joint Fie...,,,NaT,,Hurricane,,,,


In [469]:
MA_disaster_combined[(MA_disaster_combined['incidentId']==2023052201)]

Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork,femaDeclarationString,incidentType,incidentBeginDate,fipsStateCode,designatedIncidentTypes,declarationTitle,year,month,day
469,2023052201,GU,Tropical Storm,9,FOS,Normal,4.0,USDA-FS,4715DRGUUSDA-FS01,DR,"Activate ESF 4 USFS to the RRCC, IOF JFO or ot...","As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
470,2023052201,GU,Tropical Storm,9,FOS,Normal,11.0,USDA-APH,4715DRGUUSDA-APH02,DR,"Activate ESF 11 USDA liaison(s) to the RRCC, ...","As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
471,2023052201,GU,Tropical Storm,9,FOS,High,11.0,USDA-APH,4715DRGUUSDA-APH01,DR,USDA liaison(s) to the NRCC to perform duties ...,"As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
472,2023052201,GU,Tropical Storm,9,DFA,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR04,DR,HHS to provide DMAT team in support of MAWAR\r...,In support of Guam as directed by and in coord...,,,NaT,,Tropical Storm,,,,
473,2023052201,GU,Tropical Storm,9,FOS,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR03,DR,"Activate HHS to RRCC, IOF, JFO, RFO, or other ...","As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
474,2023052201,GU,Tropical Storm,9,FOS,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR02,DR,HHS to provide DMAT team in support of MAWAR,In support of Guam as directed by and in coord...,,,NaT,,Tropical Storm,,,,
475,2023052201,GU,Tropical Storm,9,FOS,Lifesaving,8.0,HHS-ASPR,4715DRGUHHS-ASPR01,DR,Activate HHS to NRCC to perform duties of ESF ...,"As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
476,2023052201,GU,Tropical Storm,9,FOS,Normal,7.0,GSA,4715DRGUGSA02,DR,"Activate ESF 7 GSA to RRCC, IOF, JFO, or other...","As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
477,2023052201,GU,Tropical Storm,9,FOS,High,7.0,GSA,4715DRGUGSA01,DR,Activate GSA to NRCC to perform duties of ESF ...,"As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,
478,2023052201,GU,Tropical Storm,9,FOS,Normal,2.0,FCC,4715DRGUFCC01,DR,Activate FCC to the NRCC to support Typhoon MA...,"As directed by and in coordination with FEMA, ...",,,NaT,,Tropical Storm,,,,


In [470]:
df_ma[df_ma['incidentId']==2023052201]

Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork
1773,2023052201,GU,Tropical Storm,9,FOS,Normal,4.0,USDA-FS,4715DRGUUSDA-FS01,DR,"Activate ESF 4 USFS to the RRCC, IOF JFO or ot...","As directed by and in coordination with FEMA, ..."
1776,2023052201,GU,Tropical Storm,9,FOS,Normal,11.0,USDA-APH,4715DRGUUSDA-APH02,DR,"Activate ESF 11 USDA liaison(s) to the RRCC, ...","As directed by and in coordination with FEMA, ..."
1778,2023052201,GU,Tropical Storm,9,FOS,High,11.0,USDA-APH,4715DRGUUSDA-APH01,DR,USDA liaison(s) to the NRCC to perform duties ...,"As directed by and in coordination with FEMA, ..."
1781,2023052201,GU,Tropical Storm,9,DFA,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR04,DR,HHS to provide DMAT team in support of MAWAR\r...,In support of Guam as directed by and in coord...
1782,2023052201,GU,Tropical Storm,9,FOS,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR03,DR,"Activate HHS to RRCC, IOF, JFO, RFO, or other ...","As directed by and in coordination with FEMA, ..."
1784,2023052201,GU,Tropical Storm,9,FOS,High,8.0,HHS-ASPR,4715DRGUHHS-ASPR02,DR,HHS to provide DMAT team in support of MAWAR,In support of Guam as directed by and in coord...
1785,2023052201,GU,Tropical Storm,9,FOS,Lifesaving,8.0,HHS-ASPR,4715DRGUHHS-ASPR01,DR,Activate HHS to NRCC to perform duties of ESF ...,"As directed by and in coordination with FEMA, ..."
1789,2023052201,GU,Tropical Storm,9,FOS,Normal,7.0,GSA,4715DRGUGSA02,DR,"Activate ESF 7 GSA to RRCC, IOF, JFO, or other...","As directed by and in coordination with FEMA, ..."
1790,2023052201,GU,Tropical Storm,9,FOS,High,7.0,GSA,4715DRGUGSA01,DR,Activate GSA to NRCC to perform duties of ESF ...,"As directed by and in coordination with FEMA, ..."
1792,2023052201,GU,Tropical Storm,9,FOS,Normal,2.0,FCC,4715DRGUFCC01,DR,Activate FCC to the NRCC to support Typhoon MA...,"As directed by and in coordination with FEMA, ..."


In [471]:
df_dds[df_dds['incidentId']==2023052201]


Unnamed: 0,femaDeclarationString,state,incidentType,incidentBeginDate,fipsStateCode,region,designatedIncidentTypes,declarationTitle,incidentId,declarationType,year,month,day


In [472]:
MA_disaster_combined['incidentId'].nunique()

326

In [474]:
MA_disaster_combined.dropna(inplace=True)

In [476]:
MA_disaster_combined['incidentId'].nunique()


321

In [477]:
MA_disaster_combined[MA_disaster_combined['year']<2023].to_parquet(train_filepath)
MA_disaster_combined[MA_disaster_combined['year']>=2023].to_parquet(test_filepath)