# Billion Dollar Disasters Data Cleaning

### In this notebook, I read in the scraped data from Billion Dollar Disasters and do some cleaning in the Pandas library.

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('billion_dollar_disasters_scrape.csv')
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,end_date,details,cost,deaths
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.4 CI,136
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.3 CI,136
2,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32
3,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32
4,Hurricane Helene,Tropical Cyclone,"Sep 24, 2024","Sep 29, 2024",Category 4 Hurricane Helene with 140 mph susta...,$78.7 CI,219


In [5]:
df.rename(columns={'cost': 'est_cost_billionUSD_CPI-adjusted'}, inplace=True)

In [6]:
import re

df['begin_year'] = df['begin_date'].str.extract(r', (\d{4})')
df['end_year'] = df['end_date'].str.extract(r', (\d{4})')
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,end_date,details,est_cost_billionUSD_CPI-adjusted,deaths,begin_year,end_year
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.4 CI,136,2024,2024
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.3 CI,136,2024,2024
2,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32,2024,2024
3,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32,2024,2024
4,Hurricane Helene,Tropical Cyclone,"Sep 24, 2024","Sep 29, 2024",Category 4 Hurricane Helene with 140 mph susta...,$78.7 CI,219,2024,2024


In [10]:
df['est_cost_billionUSD_CPI-adjusted'] = df['est_cost_billionUSD_CPI-adjusted'].str.replace('$', '') 
df['est_cost_billionUSD_CPI-adjusted'] = df['est_cost_billionUSD_CPI-adjusted'].str.replace(' CI', '')
df['est_cost_billionUSD_CPI-adjusted'] = df['est_cost_billionUSD_CPI-adjusted'].str.replace('*', '')
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,end_date,details,est_cost_billionUSD_CPI-adjusted,deaths,begin_year,end_year
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",5.4,136,2024,2024
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",5.3,136,2024,2024
2,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,34.3,32,2024,2024
3,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,34.3,32,2024,2024
4,Hurricane Helene,Tropical Cyclone,"Sep 24, 2024","Sep 29, 2024",Category 4 Hurricane Helene with 140 mph susta...,78.7,219,2024,2024


In [11]:
df['est_cost_billionUSD_CPI-adjusted'] = df['est_cost_billionUSD_CPI-adjusted'].astype(float)

In [13]:
df['deaths'] = df['deaths'].str.replace(',', '')

In [25]:
df['deaths'] = df['deaths'].astype(int)

In [26]:
df['begin_date'] = pd.to_datetime(df['begin_date'])
df['end_date'] = pd.to_datetime(df['end_date'])

In [27]:
df['disaster_length_days'] = (df['end_date'] - df['begin_date']).dt.days
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,begin_year,end_date,end_year,disaster_length_days,details,est_cost_billionUSD_CPI-adjusted,deaths
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.4,136
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.3,136
2,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
3,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
4,Hurricane Helene,Tropical Cyclone,2024-09-24,2024,2024-09-29,2024,5,Category 4 Hurricane Helene with 140 mph susta...,78.7,219


In [28]:
col = df.pop('begin_year')  
df.insert(3, 'begin_year', col)  

In [29]:
col = df.pop('end_year')  
df.insert(5, 'end_year', col)  
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,begin_year,end_date,end_year,disaster_length_days,details,est_cost_billionUSD_CPI-adjusted,deaths
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.4,136
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.3,136
2,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
3,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
4,Hurricane Helene,Tropical Cyclone,2024-09-24,2024,2024-09-29,2024,5,Category 4 Hurricane Helene with 140 mph susta...,78.7,219


In [30]:
col = df.pop('disaster_length_days')  
df.insert(6, 'disaster_length_days', col)  
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,begin_year,end_date,end_year,disaster_length_days,details,est_cost_billionUSD_CPI-adjusted,deaths
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.4,136
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,2024-01-01,2024,2024-12-31,2024,365,"Drought conditions impacted many Southern, Eas...",5.3,136
2,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
3,Hurricane Milton,Tropical Cyclone,2024-10-09,2024,2024-10-10,2024,1,Category 3 Hurricane Milton with 120 mph susta...,34.3,32
4,Hurricane Helene,Tropical Cyclone,2024-09-24,2024,2024-09-29,2024,5,Category 4 Hurricane Helene with 140 mph susta...,78.7,219


In [31]:
df.dtypes

event_name                                  object
disaster_type                               object
begin_date                          datetime64[ns]
begin_year                                  object
end_date                            datetime64[ns]
end_year                                    object
disaster_length_days                         int64
details                                     object
est_cost_billionUSD_CPI-adjusted           float64
deaths                                       int64
dtype: object

In [32]:
df.to_csv('billion_dollar_disasters_cleaned.csv')