In [1]:
import pandas as pd
import json

In [5]:
#loading the fema dataset
with open('DisasterDeclarationsSummaries.json', 'r') as f:
    fema_json_data = json.load(f)

In [8]:
#extracting the fema dataset records into a dict
fema_records = fema_json_data['DisasterDeclarationsSummaries']
fema_records[0]

{'femaDeclarationString': 'FM-5529-OR',
 'disasterNumber': 5529,
 'state': 'OR',
 'declarationType': 'FM',
 'declarationDate': '2024-08-09T00:00:00.000Z',
 'fyDeclared': 2024,
 'incidentType': 'Fire',
 'declarationTitle': 'LEE FALLS FIRE',
 'ihProgramDeclared': False,
 'iaProgramDeclared': False,
 'paProgramDeclared': True,
 'hmProgramDeclared': True,
 'incidentBeginDate': '2024-08-08T00:00:00.000Z',
 'incidentEndDate': None,
 'disasterCloseoutDate': None,
 'tribalRequest': False,
 'fipsStateCode': '41',
 'fipsCountyCode': '067',
 'placeCode': '99067',
 'designatedArea': 'Washington (County)',
 'declarationRequestNumber': '24122',
 'lastIAFilingDate': None,
 'incidentId': '2024081001',
 'region': 10,
 'designatedIncidentTypes': 'R',
 'lastRefresh': '2024-08-27T18:22:14.800Z',
 'hash': 'ae87cf3c6ed795015b714af7166c7c295b2b67c7',
 'id': '09e3f81a-5e16-4b72-b317-1c64e0cfa59c'}

In [11]:
#dict-> dataframe
df_fema = pd.DataFrame(fema_records)
df_fema.head()

Unnamed: 0,femaDeclarationString,disasterNumber,state,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,iaProgramDeclared,...,placeCode,designatedArea,declarationRequestNumber,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash,id
0,FM-5529-OR,5529,OR,FM,2024-08-09T00:00:00.000Z,2024,Fire,LEE FALLS FIRE,False,False,...,99067,Washington (County),24122,,2024081001,10,R,2024-08-27T18:22:14.800Z,ae87cf3c6ed795015b714af7166c7c295b2b67c7,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
1,FM-5528-OR,5528,OR,FM,2024-08-06T00:00:00.000Z,2024,Fire,ELK LANE FIRE,False,False,...,99031,Jefferson (County),24116,,2024080701,10,R,2024-08-27T18:22:14.800Z,432cf0995c47e3895cea696ede5621b810460501,59983f89-30bf-4888-b21b-62e8d57d9aac
2,FM-5527-OR,5527,OR,FM,2024-08-02T00:00:00.000Z,2024,Fire,MILE MARKER 132 FIRE,False,False,...,99017,Deschutes (County),24111,,2024080301,10,R,2024-08-27T18:22:14.800Z,2f21d90cb6bc64b0d4121aa3f18d852bbb4b11fa,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
3,DR-4312-CA,4312,CA,DR,2017-05-02T00:00:00.000Z,2017,Severe Storm,FLOODING,False,False,...,60347,Resighini Rancheria (Indian Reservation),17035,,2017041001,9,,2025-03-26T20:21:32.579Z,432a3a64bdbb291ae26cf5a27a33deeabb380481,98a7c5bb-2346-45aa-a1ca-0399440d4f0b
4,DR-4251-AL,4251,AL,DR,2016-01-21T00:00:00.000Z,2016,Severe Storm,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",False,False,...,99001,Autauga (County),16003,,2015122301,4,,2025-03-27T12:21:46.559Z,dcd4ce6b37ee49875b3f1e32e9a8a16cd6a803d3,5229bbae-eee6-42b8-b277-edbafa8d6cb2


In [13]:
#header cleaning fema
df_fema.columns = df_fema.columns.str.strip().str.lower().str.replace(' ', '_')
df_fema.columns

Index(['femadeclarationstring', 'disasternumber', 'state', 'declarationtype',
       'declarationdate', 'fydeclared', 'incidenttype', 'declarationtitle',
       'ihprogramdeclared', 'iaprogramdeclared', 'paprogramdeclared',
       'hmprogramdeclared', 'incidentbegindate', 'incidentenddate',
       'disastercloseoutdate', 'tribalrequest', 'fipsstatecode',
       'fipscountycode', 'placecode', 'designatedarea',
       'declarationrequestnumber', 'lastiafilingdate', 'incidentid', 'region',
       'designatedincidenttypes', 'lastrefresh', 'hash', 'id'],
      dtype='object')

In [16]:
#creating the FIPS code (state->2 digit,county->3 digit)
df_fema['fips_code'] = df_fema['fipsstatecode'].astype(str).str.zfill(2) + df_fema['fipscountycode'].astype(str).str.zfill(3)
df_fema['fips_code'].head(5)

Unnamed: 0,fips_code
0,41067
1,41031
2,41017
3,6000
4,1001


In [18]:
#fixing date
df_fema['incident_start_date'] = pd.to_datetime(df_fema['incidentbegindate'], errors='coerce')
df_fema['incident_start_date'].head(5)

Unnamed: 0,incident_start_date
0,2024-08-08 00:00:00+00:00
1,2024-08-04 00:00:00+00:00
2,2024-08-02 00:00:00+00:00
3,2017-02-08 00:00:00+00:00
4,2015-12-23 00:00:00+00:00


In [26]:
#creating target variable, if major disaster 1 -> small incidents 0
df_fema['is_major_disaster'] = (df_fema['declarationtype'] == 'DR').astype(int)
df_fema['is_major_disaster'].head(5)

Unnamed: 0,is_major_disaster
0,0
1,0
2,0
3,1
4,1


In [27]:
#checking for imbalance is disaster declaration
imbalance_check = df_fema['is_major_disaster'].value_counts(normalize=True)
print(f"Class Distribution:\n{imbalance_check}")

Class Distribution:
is_major_disaster
0    0.572
1    0.428
Name: proportion, dtype: float64


In [28]:
#incident type cleaning
df_fema['incidenttype'] = df_fema['incidenttype'].str.strip().str.title()

In [29]:
#final required fema data
required_cols = [
    'fips_code',
    'incident_start_date',
    'incidenttype',
    'is_major_disaster',
    'state'
]
df_fema_final = df_fema[required_cols]
df_fema_final.to_csv('cleaned_fema_disasters.csv', index=False)