Import Packages

In [12]:
import pandas as pd
import numpy as np
import warnings

from datetime import datetime, timedelta
from IPython.display import display

warnings.filterwarnings("ignore")

Read Event Data

In [13]:
natural_disaster_data_processed = pd.read_csv("Events_Data_Raw.csv")#read input data
natural_disaster_data_processed.columns = natural_disaster_data_processed.iloc[5]#set the column names
natural_disaster_data_processed = pd.DataFrame(natural_disaster_data_processed, columns=["Year", "Disaster Subgroup", "Disaster Type", "Disaster Subtype", "Country", "Start Month", "End Month", "Start Year", "End Year", "Total Deaths", "No Injured", "No Homeless", "Total Affected"])#filter out only columns we want

countries = ["Canada", "USA", "Mexico", "Burundi", "Chad", "Niger", "Bolivia", "Philippines", "Indonesia"]

natural_disaster_data_processed = natural_disaster_data_processed.drop(natural_disaster_data_processed.index[0:6], axis=0)
natural_disaster_data_processed = natural_disaster_data_processed.sort_values(by=['Country','Year'], ascending=[True,True])

##replace country names with preset names
natural_disaster_data_processed["Country"].replace({"Philippines (the)": "Philippines"}, inplace=True)
natural_disaster_data_processed["Country"].replace({"United States of America (the)": "USA"}, inplace=True)
natural_disaster_data_processed["Country"].replace({"Niger (the)": "Niger"}, inplace=True)
natural_disaster_data_processed["Country"].replace({"Bolivia (Plurinational State of)": "Bolivia"}, inplace=True)

##set NaN values to 0
natural_disaster_data_processed["Total Deaths"].replace({np.NaN: 0}, inplace=True)
natural_disaster_data_processed["No Injured"].replace({np.NaN: 0}, inplace=True)
natural_disaster_data_processed["No Homeless"].replace({np.NaN: 0}, inplace=True)
natural_disaster_data_processed["Total Affected"].replace({np.NaN: 0}, inplace=True)

natural_disaster_data_processed['Total Affected'] = natural_disaster_data_processed['Total Affected'].astype(int)

finalframe = natural_disaster_data_processed.iloc[0:0, :]

for country in countries:
    countryframe = natural_disaster_data_processed.loc[natural_disaster_data_processed['Country'] == country]
    countryframe = countryframe.dropna()
    # countryframe["Total Affected"] = countryframe["Total Affected"].nlargest(n=10)
    countryframe = countryframe.nlargest(10, "Total Affected", keep='first')
    finalframe = finalframe.append(countryframe, ignore_index=True)

finalframe.columns = ["Year_Ref", "Disaster_Subgroup", "Disaster_Type", "Disaster_Subtype", "Country_Name", "Start_Month", "End_Month", "Start_Year", "End_Year", "Total_Deaths", "Total_Injuries", "Total_Homeless", "Total_Affected"]
finalframe["Event_Key"] = np.arange(len(finalframe))
finalframe = pd.DataFrame(finalframe, columns=["Event_Key", "Year_Ref", "Disaster_Subgroup", "Disaster_Type", "Disaster_Subtype", "Country_Name", "Start_Month", "End_Month", "Start_Year", "End_Year", "Total_Deaths", "Total_Injuries", "Total_Homeless", "Total_Affected"])#filter out only columns we want
finalframe.head(50)


Unnamed: 0,Event_Key,Year_Ref,Disaster_Subgroup,Disaster_Type,Disaster_Subtype,Country_Name,Start_Month,End_Month,Start_Year,End_Year,Total_Deaths,Total_Injuries,Total_Homeless,Total_Affected
0,0,2013,Hydrological,Flood,Riverine flood,Canada,6,6,2013,2013,4,0,0,100000
1,1,2016,Climatological,Wildfire,Forest fire,Canada,5,7,2016,2016,0,0,0,88000
2,2,2020,Meteorological,Storm,Convective storm,Canada,6,6,2020,2020,0,0,0,60000
3,3,2015,Climatological,Wildfire,Forest fire,Canada,7,7,2015,2015,0,0,36,13036
4,4,2020,Hydrological,Flood,Riverine flood,Canada,4,4,2020,2020,1,0,0,12936
5,5,2008,Technological,Industrial accident,Explosion,Canada,8,8,2008,2008,2,18,0,12518
6,6,2011,Climatological,Wildfire,Forest fire,Canada,5,5,2011,2011,1,0,7000,7000
7,7,2014,Hydrological,Flood,Flash flood,Canada,8,8,2014,2014,0,4,0,6904
8,8,2005,Hydrological,Flood,Riverine flood,Canada,6,7,2005,2005,4,0,0,5000
9,9,2018,Climatological,Wildfire,Forest fire,Canada,7,8,2018,2018,0,0,0,3000


Export to CSV for Fact Table

In [14]:
finalframe.to_csv("Event_Pro_Table.csv", index = False, header = True)

Export to CSV for DB

In [15]:
final_table = finalframe.drop(columns=["Country_Name", "Year_Ref"])
display(final_table.head())
final_table.to_csv("Event_Pro_DB.csv", index = False, header = True)

Unnamed: 0,Event_Key,Disaster_Subgroup,Disaster_Type,Disaster_Subtype,Start_Month,End_Month,Start_Year,End_Year,Total_Deaths,Total_Injuries,Total_Homeless,Total_Affected
0,0,Hydrological,Flood,Riverine flood,6,6,2013,2013,4,0,0,100000
1,1,Climatological,Wildfire,Forest fire,5,7,2016,2016,0,0,0,88000
2,2,Meteorological,Storm,Convective storm,6,6,2020,2020,0,0,0,60000
3,3,Climatological,Wildfire,Forest fire,7,7,2015,2015,0,0,36,13036
4,4,Hydrological,Flood,Riverine flood,4,4,2020,2020,1,0,0,12936
