# Master Thesis: Natural Disasters Descriptive Statistics

Author:
[Dominik Bursy](mailto:dominik.bursy@icloud.com)
 
Last Update: October 2022
 
---
 
The objective of this notebook is to inspect the data from the [International Disaster Database](https://www.emdat.be/).
 
---
 
## Table of Contents

- [Import Packages](#packages)
- [Set Color Scheme](#color_scheme)
- [Descriptive Analysis EM-DAT](#descriptive_analysis)

---

## Import Packages

In [8]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import geopandas as gpd
import time

import statsmodels.api as sm

import os
import warnings
warnings.simplefilter(action='ignore')

pd.set_option('display.max_columns', None)

## Set Color Scheme

In [9]:
sns.set_theme()
# https://towardsdatascience.com/how-to-use-your-own-color-palettes-with-seaborn-a45bf5175146

#sns.set_theme(style="ticks", rc=custom_params, palette="Paired")

custom_params = {"axes.spines.right": False, "axes.spines.top": False, "xtick.bottom":True, "ytick.left":True}
sns.set_theme(style="white", rc=custom_params, palette='Paired') #viridis #Spectral

# Create an array with the colors you want to use
#colors = sns.color_palette("viridis", n_colors=10)

# Set your custom color palette
#sns.set_palette(sns.color_palette(colors))

# Descriptive Analysis EM-DAT

### Data Prepatration

In [19]:
## Read the data

#df_global = pd.read_excel(
#    os.path.join(os.path.dirname("__file__"), '..', 'data', 'emdat_global_full.xlsx'),
#    sheet_name='emdat data'
#)

df_global = pd.read_csv(
    os.path.join(os.path.dirname("__file__"), '..', 'data', 'emdat_global_full.csv')
)

In [20]:
## Exclude Biological: Epidemic, Insect infestation, Animal accident
## Exclude Extra-terrestrial: Impact
## Pool Mass movement (dry) with Landslide

condition = ['Epidemic', 'Insect infestation', 'Animal accident', 'Impact']

df_global = df_global[~df_global['Disaster Type'].isin(condition)]

df_global.loc[df_global['Disaster Type'] == 'Mass movement (dry)', 'Disaster Type'] = 'Landslide'

In [12]:
## Split the Continent Americas into North and South America
## Rename Ociania
## https://www.worldometers.info/geography/7-continents/

df_global.loc[df_global['Region'].isin(['Caribbean', 'Central America', 'Northern America']), 'Continent'] = 'North America'
df_global.loc[df_global['Region'].isin(['South America']), 'Continent'] = 'South America'
df_global.loc[df_global['Continent'] == 'Oceania', 'Continent'] = 'Australia/Oceania'

---

In [13]:
## Average Individuals affected per Continent

condition = df_global['Disaster Type'] == 'Wildfire'
year_condition = (df_global['Year'] >= 2003) & (df_global['Year'] <= 2008)
condition = condition & year_condition
df_group = df_global[condition].groupby(['Year', 'Continent']).sum()

feature_condition = ['Total Affected']
display(df_group[feature_condition].groupby(['Year']).sum())

new_index = ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Australia/Oceania']
display(df_group[feature_condition].groupby(['Continent']).sum().reindex(new_index))

display((df_group[feature_condition].groupby(['Year']).sum()).mean())
display((df_group[feature_condition].groupby(['Continent']).sum()).mean())

Unnamed: 0_level_0,Total Affected
Year,Unnamed: 1_level_1
2003,184333.0
2004,20519.0
2005,6907.0
2006,2062.0
2007,1785017.0
2008,58668.0


Unnamed: 0_level_0,Total Affected
Continent,Unnamed: 1_level_1
Asia,5110.0
Africa,6925.0
Europe,1161352.0
North America,752823.0
South America,128285.0
Australia/Oceania,3011.0


Total Affected    342917.666667
dtype: float64

Total Affected    342917.666667
dtype: float64

In [14]:
## Average Damage per Continent

condition = df_global['Disaster Type'] == 'Wildfire'
year_condition = (df_global['Year'] >= 2003) & (df_global['Year'] <= 2008)
condition = condition & year_condition
df_group = df_global[condition].groupby(['Year', 'Continent']).sum()

feature_condition = ['Total Damages (\'000 US$)']
#feature_condition = ['Total Damages, Adjusted (\'000 US$)']
display(df_group[feature_condition].groupby(['Year']).sum())

new_index = ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Australia/Oceania']
display(df_group[feature_condition].groupby(['Continent']).sum().reindex(new_index))

display((df_group[feature_condition].groupby(['Year']).sum()).mean())
display((df_group[feature_condition].groupby(['Continent']).sum()).mean())

Unnamed: 0_level_0,Total Damages ('000 US$)
Year,Unnamed: 1_level_1
2003,6095000.0
2004,3000.0
2005,3850000.0
2006,839000.0
2007,4597454.0
2008,2532000.0


Unnamed: 0_level_0,Total Damages ('000 US$)
Continent,Unnamed: 1_level_1
Asia,14000.0
Africa,430000.0
Europe,7874454.0
North America,9128000.0
South America,30000.0
Australia/Oceania,440000.0


Total Damages ('000 US$)    2.986076e+06
dtype: float64

Total Damages ('000 US$)    2.986076e+06
dtype: float64

---