# EXTRACT EVENTS FROM EM-DAT

Database downloaded using an account at https://www.emdat.be/

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import datetime
import re
import io
import csv
import os

In [2]:
def import_file_as_dataframe(FILENAME, path):
    
    INPUT_FILE = path + FILENAME
    
    #Read data from file without converting dates
    with open(INPUT_FILE) as f:
        data = json.load(f)
        data = pd.DataFrame(data)

    return data

In [3]:
# Changing format to emdat dataframes

def format_emdat(data):
    
    data['start_date'] = pd.to_datetime(data['start_date'], format='%d/%m/%Y', errors='coerce')
    data['end_date'] = pd.to_datetime(data['end_date'], format='%d/%m/%Y', errors='coerce')
    data.index = data.pop('start_date')
    
    return data

In [4]:
data = import_file_as_dataframe('2019.json', './data/EMDAT/')
data = format_emdat(data)
data.head()

Unnamed: 0_level_0,end_date,country_name,iso,location,latitude,longitude,dis_mag_value,dis_mag_scale,dis_type,dis_subtype,total_deaths,total_affected,total_dam,insured_losses,event_name,disaster_no,associated_dis,associated_dis2
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-05-09,2018-05-22,Afghanistan,AFG,,34.94,68.216,184396.9,Km2,Flood,--,72,4000,0,0,,2018-0156,--,--
2018-07-12,2018-07-12,Afghanistan,AFG,khenj district (Panjshir),,,,,Landslide,Mudslide,12,0,0,0,,2018-0189,--,--
2018-05-07,2018-05-15,Afghanistan,AFG,,,,,Km2,Flood,--,18,0,0,0,,2018-0212,--,--
2018-07-12,2018-07-12,Afghanistan,AFG,"Peshghor (Khenj district, Panjshir)",,,,,Landslide,Mudslide,10,2750,0,0,,2018-0242,--,--
2018-08-24,2018-08-24,Afghanistan,AFG,Ghazi-Abad district (Kunar province),,,,Km2,Flood,Flash flood,11,0,0,0,,2018-0350,--,--


In [5]:
data = data.drop(columns=['associated_dis2', 'dis_mag_scale', 'iso', 'dis_type', 'insured_losses', 'total_dam', 'event_name'])

In [6]:
data = data.loc['2018-08-01':'2019-04-01'] #Select events within the range we are interested in

In [7]:
data.head()

Unnamed: 0_level_0,end_date,country_name,location,latitude,longitude,dis_mag_value,dis_subtype,total_deaths,total_affected,disaster_no,associated_dis
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-08-24,2018-08-24,Afghanistan,Ghazi-Abad district (Kunar province),,,,Flash flood,11,0,2018-0350,--
2019-01-06,2019-01-06,Afghanistan,Kohistan district (Badakhshan province),,,,Flash flood,30,0,2019-0050,Industrial accidents
2019-03-02,NaT,Afghanistan,"Zheri, Dand,Damand, Arghandab, Spinboldak,Takh...",31.991,67.078,441236.3,--,63,129100,2019-0082,"Slide (land, mud, snow, rock)"
2019-03-18,2019-03-26,Afghanistan,"Herat, Ghoryan, Koh Zor, Rubat Sangi , Zinda J...",,,,--,35,0,2019-0125,--
2019-02-20,2019-02-28,Angola,"Cangadala, Cambundi-Catembo, Quirima (Malanje ...",,,,Flash flood,11,4255,2019-0070,--


In [8]:
data = data.sort_values(by='start_date')

In [9]:
list(data.columns.values)

['end_date',
 'country_name',
 'location',
 'latitude',
 'longitude',
 'dis_mag_value',
 'dis_subtype',
 'total_deaths',
 'total_affected',
 'disaster_no',
 'associated_dis']

In [10]:
data.rename(columns={'country_name': 'countries', 'total_deaths': 'deaths', 'dis_mag_value': 'area', 'longitude': 'long', 'latitude': 'lat', 'disaster_no': 'id_emdat', 'dis_subtype': 'type', 'total_affected': 'affected', 'associated_dis': 'cause', 'location': 'locations'}, inplace=True)
data.rename_axis('start_date', inplace=True)

data.head()

Unnamed: 0_level_0,end_date,countries,locations,lat,long,area,type,deaths,affected,id_emdat,cause
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-08-01,2018-08-10,Venezuela (Bolivarian Republic of),"Bolivar, Monagas, Amazonas states",,,,--,0,9700,2018-0290,--
2018-08-01,2018-09-04,Sudan (the),"Kassala, West Kordofan, Khartoum",13.487,31.942,629241.72,Flash flood,23,70060,2018-0387,--
2018-08-02,2018-08-06,India,"Biswanath, Udalguri, Golaghat, Sivassagar/Char...",,,,--,3,25000,2018-0345,--
2018-08-07,2018-08-19,Mali,"Kayes city, Goumera village, Kita city (Kayes)...",,,,--,0,13150,2018-0441,--
2018-08-07,2018-08-20,India,"Thrissur, Chengannur (Kerala state); West Beng...",,,,Flash flood,504,23220000,2018-0295,"Slide (land, mud, snow, rock)"


In [11]:
data['in_emdat'] = 1 #Source identifier for future merging

In [12]:
outfile = './data/databases/emdat.csv'
data.to_csv(outfile, sep=',')