# Data wrangling

In [6]:
import kagglehub
import pandas as pd
from kagglehub import KaggleDatasetAdapter

## 1. Download disaster data from Kaggle

In [3]:
# Load a DataFrame with a specific version of a CSV
raw_data_df=kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    'headsortails/us-natural-disaster-declarations',
    'us_disaster_declarations.csv',
)

Downloading from https://www.kaggle.com/api/v1/datasets/download/headsortails/us-natural-disaster-declarations?dataset_version_number=116&file_name=us_disaster_declarations.csv...


100%|██████████| 3.94M/3.94M [00:00<00:00, 155MB/s]

Extracting zip of us_disaster_declarations.csv...





In [5]:
raw_data_df.head().transpose()

Unnamed: 0,0,1,2,3,4
fema_declaration_string,DR-1-GA,DR-2-TX,DR-3-LA,DR-4-MI,DR-5-MT
disaster_number,1,2,3,4,5
state,GA,TX,LA,MI,MT
declaration_type,DR,DR,DR,DR,DR
declaration_date,1953-05-02T00:00:00Z,1953-05-15T00:00:00Z,1953-05-29T00:00:00Z,1953-06-02T00:00:00Z,1953-06-06T00:00:00Z
fy_declared,1953,1953,1953,1953,1953
incident_type,Tornado,Tornado,Flood,Tornado,Flood
declaration_title,Tornado,Tornado & Heavy Rainfall,Flood,Tornado,Floods
ih_program_declared,0,0,0,0,0
ia_program_declared,1,1,1,1,1


In [8]:
raw_data_df.to_csv('../data/raw_disaster_data.csv', index=False)

## 2. Download FIPS code database from census.gov

In [9]:
fips_df=pd.read_csv('https://www2.census.gov/geo/docs/reference/codes2020/national_county2020.txt', sep='|')
fips_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNS,COUNTYNAME,CLASSFP,FUNCSTAT
0,AL,1,1,161526,Autauga County,H1,A
1,AL,1,3,161527,Baldwin County,H1,A
2,AL,1,5,161528,Barbour County,H1,A
3,AL,1,7,161529,Bibb County,H1,A
4,AL,1,9,161530,Blount County,H1,A


In [10]:
fips_df.to_csv('../data/fips_codes.csv')

## 3. Add county name based on FIPS code

In [31]:
# Make a dictionary to translate FIPS county codes to county names

# First extract the state and county FIPS codes
state_fp=fips_df['STATEFP'].to_list()
county_fp=fips_df['COUNTYFP'].to_list()

# Left zero pad state and county FIPS codes to two and three digits respectly
state_fp=[str(n).zfill(2) for n in state_fp]
county_fp=[str(n).zfill(3) for n in county_fp]

# Concatenate the state and county codes to get the full FIPS code
fips=[i+j for i,j in zip(state_fp, county_fp)]

fips_lookup=dict(zip(fips, fips_df['COUNTYNAME']))

# Add a new column to the raw disaster data containing the FIPS code to be translated
data_df=raw_data_df.copy()
data_df['county_name']=raw_data_df['fips'].apply(str)

# # Translate the column values from FIPS to county name
data_df['county_name']=data_df['county_name'].replace(fips_lookup)
data_df.tail()

Unnamed: 0,fema_declaration_string,disaster_number,state,declaration_type,declaration_date,fy_declared,incident_type,declaration_title,ih_program_declared,ia_program_declared,...,disaster_closeout_date,fips,place_code,designated_area,declaration_request_number,last_ia_filing_date,last_refresh,hash,id,county_name
64087,DR-4696-ME,4696,ME,DR,2023-03-22T00:00:00Z,2023,Severe Storm,Severe Storm And Flooding,0,0,...,,23031,99031,"York (County)(in MSA 6400,6450)",23013,,2023-03-23T22:31:41Z,c6ed4ead7fd3bee8113ef15d9110e845c56a8b2f,3bd84f66-74d0-4e4a-9287-2414fda89014,York County
64088,DR-4697-MS,4697,MS,DR,2023-03-26T00:00:00Z,2023,Severe Storm,"Severe Storms, Straight-Line Winds, And Tornadoes",1,0,...,,28015,99015,Carroll (County),23025,2023-05-25T00:00:00Z,2023-03-26T06:41:23Z,9ccd8b25b2e64d4e44a043b1da61f3c42e2d02a9,764fa617-efd9-44c4-9d4a-c48e972b485c,Carroll County
64089,DR-4697-MS,4697,MS,DR,2023-03-26T00:00:00Z,2023,Severe Storm,"Severe Storms, Straight-Line Winds, And Tornadoes",1,0,...,,28053,99053,Humphreys (County),23025,2023-05-25T00:00:00Z,2023-03-26T06:41:23Z,62d6f324279680dbb6b1eb2ce91a9bf5ae964e24,c9300696-062b-430f-9f9d-3ad6b1562779,Humphreys County
64090,DR-4697-MS,4697,MS,DR,2023-03-26T00:00:00Z,2023,Severe Storm,"Severe Storms, Straight-Line Winds, And Tornadoes",1,0,...,,28095,99095,Monroe (County),23025,2023-05-25T00:00:00Z,2023-03-26T06:41:23Z,5cb6b2789fc77375f0a1d0e1a8898a1ce3dfc952,df697ed2-4c59-4d02-84e7-d4e84d6e2723,Monroe County
64091,DR-4697-MS,4697,MS,DR,2023-03-26T00:00:00Z,2023,Severe Storm,"Severe Storms, Straight-Line Winds, And Tornadoes",1,0,...,,28125,99125,Sharkey (County),23025,2023-05-25T00:00:00Z,2023-03-26T06:41:23Z,ef1b370fa2c218a02e815935fc7a35173f266e30,821952eb-f0a2-4109-9fa3-b7029e882874,Sharkey County


In [32]:
data_df.to_csv('../data/disaster_data.csv', index=False)