# Analysis of EMDAT data

**Imports**

In [23]:
import pandas as pd
import datetime
from disaster_extr_constants import *
from disaster_extr_helpers import df_time_interval

In [24]:
data = 'data/emdat_processed.csv'
parse_dates = ['StartDate', 'EndDate']
df_emdat = pd.read_csv(data, index_col="Dis No", parse_dates = parse_dates)

In [25]:
df_emdat.head(4)

Unnamed: 0_level_0,Group,Subgroup,Type,Subtype,Subsubtype,Name,Country,ISO,Region,Continent,...,Scale,Deaths,Injured,Affected,Homeless,TotalAffected,Damages,StartDate,EndDate,Duration
Dis No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-0011-MWI,Natural,Hydrological,Flood,Riverine flood,,,Malawi,MWI,Eastern Africa,Africa,...,Km2,278.0,645.0,638000.0,0.0,638645.0,390000.0,2015-01-01,2015-01-21,20
2015-0011-MOZ,Natural,Hydrological,Flood,Riverine flood,,,Mozambique,MOZ,Eastern Africa,Africa,...,Km2,160.0,0.0,177645.0,0.0,177645.0,0.0,2015-01-01,2015-02-09,39
2015-0030-MWI,Natural,Meteorological,Storm,Convective storm,Lightning/Thunderstorms,,Malawi,MWI,Eastern Africa,Africa,...,Kph,5.0,0.0,0.0,350.0,350.0,0.0,2015-01-02,2015-01-04,2
2015-0030-ZWE,Natural,Meteorological,Storm,Convective storm,Lightning/Thunderstorms,,Zimbabwe,ZWE,Eastern Africa,Africa,...,Kph,10.0,0.0,0.0,475.0,475.0,0.0,2015-01-02,2015-01-04,2


In [26]:
def extract_emdat(year, disaster_type, sample_frequency='2W'):
    # (re)sample_frequency is default to 2 weeks (2W), other examples are 5D (5 days)
    
    # Set start and end of year
    upper_YEAR = datetime.datetime(year, 12, 31).strftime("%Y-%m-%d")
    lower_YEAR = datetime.datetime(year, 1, 1).strftime("%Y-%m-%d")
    
    # Extract EMDAT for given time interval
    df_emdat_YEAR = df_time_interval(df_emdat, lower_YEAR, upper_YEAR, date_attr='StartDate')
    
    # Storm or Heat Wave
    if disaster_type == 'storm':
        df_disaster = df_emdat_YEAR[df_emdat_YEAR.Type == 'Storm']
    else:
        df_disaster = df_emdat_YEAR[df_emdat_YEAR.Subtype == 'Heat Wave'] 
        
    # Add artificial 'Count' column (see below)
    df_disaster['Count'] = df_disaster.index
        
    # Use .resample() and keep the 'Count' column
    resampled = df_disaster.set_index('StartDate').resample(sample_frequency).count()['Count'].dropna().reset_index()
    
    return resampled

In [37]:
extract_emdat(year=2015, disaster_type='storm')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_disaster['Count'] = df_disaster.index


Unnamed: 0,StartDate,Count
0,2015-01-04,2
1,2015-01-18,5
2,2015-02-01,4
3,2015-02-15,3
4,2015-03-01,5
5,2015-03-15,10
6,2015-03-29,5
7,2015-04-12,9
8,2015-04-26,6
9,2015-05-10,6
