# Join precipitation and EMDAT

In [1]:
# For multiple output per cell
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [2]:
#!pip install xlrd -q # For Excel support in pandas
# sudo npm install -g electron@6.1.4 orca --unsafe-perm=true --allow-root # For output SVG

In [3]:
import pandas as pd
import plotly.graph_objects as go

In [4]:
DATASET_FOLDER = "../../datasets/"
#!ls $DATASET_FOLDER

In [5]:
df = pd.read_excel(
    DATASET_FOLDER + "emdat_public_2020_09_12_query_uid-tAnKEX.xlsx",
    skiprows=5,
    header=1,
)
df = df[df["Disaster Type"] == "Flood"]
df.head(3)

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
16,1906-0023-BEL,1906,23,Natural,Hydrological,Flood,,,,Govern,...,14.0,6.0,,,,,,,,3.5223
17,1906-0024-BEL,1906,24,Natural,Hydrological,Flood,,,,Govern,...,,,,,,,,,,3.5223
50,1911-0002-CHN,1911,2,Natural,Hydrological,Flood,Riverine flood,,,Kill,...,,100000.0,,,,,,,,3.652756


In [7]:
df.columns

Index(['Dis No', 'Year', 'Seq', 'Disaster Group', 'Disaster Subgroup',
       'Disaster Type', 'Disaster Subtype', 'Disaster Subsubtype',
       'Event Name', 'Entry Criteria', 'Country', 'ISO', 'Region', 'Continent',
       'Location', 'Origin', 'Associated Dis', 'Associated Dis2',
       'OFDA Response', 'Appeal', 'Declaration', 'Aid Contribution',
       'Dis Mag Value', 'Dis Mag Scale', 'Latitude', 'Longitude', 'Local Time',
       'River Basin', 'Start Year', 'Start Month', 'Start Day', 'End Year',
       'End Month', 'End Day', 'Total Deaths', 'No Injured', 'No Affected',
       'No Homeless', 'Total Affected', 'Reconstruction Costs ('000 US$)',
       'Insured Damages ('000 US$)', 'Total Damages ('000 US$)', 'CPI'],
      dtype='object')

In [None]:
df["Start Month"] = df["Start Month"].fillna(0).astype(int)  # .astype(object)

# Rainfall

In [81]:
file = DATASET_FOLDER + "historical_precipitation.csv"
df_rain = pd.read_csv(file)
df_rain = df_rain.drop_duplicates()
df_rain.head(3)

Unnamed: 0,Rainfall-MM,Year,Month,Country_Name,Country_Code_ISO3
0,55.7448,1901,Jan Average,Afghanistan,AFG
1,13.948,1901,Feb Average,Afghanistan,AFG
2,43.8464,1901,Mar Average,Afghanistan,AFG


In [82]:
# df['Start Month'].value_counts()
df_rain.Country_Code_ISO3 = df_rain.Country_Code_ISO3.str.strip()

In [83]:
month_text = ["Unknown"] + df_rain.Month.unique().tolist()
month_int = [i for i in range(0, 13)]
month_map = dict(zip(month_text, month_int))
month_map

{'Unknown': 0,
 ' Jan Average': 1,
 ' Feb Average': 2,
 ' Mar Average': 3,
 ' Apr Average': 4,
 ' May Average': 5,
 ' Jun Average': 6,
 ' Jul Average': 7,
 ' Aug Average': 8,
 ' Sep Average': 9,
 ' Oct Average': 10,
 ' Nov Average': 11,
 ' Dec Average': 12}

In [84]:
df_rain.Month = df_rain.Month.map(month_map)

In [85]:
df_rain = df_rain[["Rainfall-MM", "Year", "Month", "Country_Code_ISO3"]]
df_rain.head(3)

Unnamed: 0,Rainfall-MM,Year,Month,Country_Code_ISO3
0,55.7448,1901,1,AFG
1,13.948,1901,2,AFG
2,43.8464,1901,3,AFG


In [108]:
# new_df = pd.merge(A_df, B_df,  how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])
df_merge = pd.merge(
    df,
    df_rain,
    how="inner",
    left_on=["Year", "Start Month", "ISO"],
    right_on=["Year", "Month", "Country_Code_ISO3"],
)
df_merge.drop(["Country_Code_ISO3", "Month"], axis=1, inplace=True)
df_merge.to_csv(DATASET_FOLDER + "emdat_with_precipitation.csv", index=False)

In [105]:
len(df_merge)

4509

In [107]:
df_merge[["Dis No", "Event Name", "Year", "Start Month", "Rainfall-MM", "ISO"]]

Unnamed: 0,Dis No,Event Name,Year,Start Month,Rainfall-MM,ISO
0,1906-0023-BEL,,1906,5,91.8369,BEL
1,1906-0024-BEL,,1906,4,34.7374,BEL
2,1926-0008-IND,,1926,7,294.4590,IND
3,1927-0012-DZA,,1927,11,20.3177,DZA
4,1928-0018-IND,,1928,10,108.1400,IND
...,...,...,...,...,...,...
4504,2016-0516-MYS,,2016,12,381.0210,MYS
4505,2016-0531-PRK,,2016,7,242.0870,PRK
4506,2016-0573-VNM,,2016,8,270.0710,VNM
4507,2016-0552-USA,,2016,3,57.8384,USA


In [89]:
df.ISO.value_counts()

IND    306
CHN    304
IDN    221
USA    189
PHL    151
      ... 
CSK      1
QAT      1
GRD      1
MUS      1
GUF      1
Name: ISO, Length: 190, dtype: int64

In [90]:
df_rain.query("Country_Code_ISO3 == 'QAT'")

Unnamed: 0,Rainfall-MM,Year,Month,Country_Code_ISO3
386976,7.92779,1901,1,QAT
386977,4.81203,1901,2,QAT
386978,18.20500,1901,3,QAT
386979,9.34320,1901,4,QAT
386980,1.08177,1901,5,QAT
...,...,...,...,...
389443,0.20859,2016,8,QAT
389444,0.00000,2016,9,QAT
389445,0.00000,2016,10,QAT
389446,2.30364,2016,11,QAT


In [91]:
df_sample = df.query("ISO == 'BEL'")

In [92]:
df_sample.head(2)

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
16,1906-0023-BEL,1906,23,Natural,Hydrological,Flood,,,,Govern,...,14.0,6.0,,,,,,,,3.5223
17,1906-0024-BEL,1906,24,Natural,Hydrological,Flood,,,,Govern,...,,,,,,,,,,3.5223


In [97]:
df_rain_sample = df_rain.query("Country_Code_ISO3 == 'BEL'")
df_rain_sample.head(3)

Unnamed: 0,Rainfall-MM,Year,Month,Country_Code_ISO3
41760,40.3495,1901,1,BEL
41761,32.2644,1901,2,BEL
41762,68.7418,1901,3,BEL


In [98]:
# WARNING : Inner Join so we loose disaster without rainfall data
df_merge = pd.merge(
    df_sample,
    df_rain_sample,
    how="inner",
    left_on=["Year", "Start Month", "ISO"],
    right_on=["Year", "Month", "Country_Code_ISO3"],
)

In [99]:
len(df_sample)
len(df_merge)

23

21

In [100]:
df_merge[
    [
        "Dis No",
        "Event Name",
        "Year",
        "Start Month",
        "Month",
        "Rainfall-MM",
        "ISO",
        "Country_Code_ISO3",
    ]
]

Unnamed: 0,Dis No,Event Name,Year,Start Month,Month,Rainfall-MM,ISO,Country_Code_ISO3
0,1906-0023-BEL,,1906,5,5,91.8369,BEL,BEL
1,1906-0024-BEL,,1906,4,4,34.7374,BEL,BEL
2,1930-0021-BEL,,1930,11,11,145.242,BEL,BEL
3,1953-0025-BEL,,1953,1,1,28.2489,BEL,BEL
4,1956-0028-BEL,,1956,5,5,57.2277,BEL,BEL
5,1971-0119-BEL,,1971,8,8,59.9165,BEL,BEL
6,1984-0356-BEL,,1984,2,2,86.6275,BEL,BEL
7,1987-0300-BEL,,1987,8,8,81.6746,BEL,BEL
8,1993-0094-BEL,,1993,1,1,97.8052,BEL,BEL
9,1993-0237-BEL,,1993,12,12,191.497,BEL,BEL
