# EXTRACT EVENTS FROM EM-DAT

Database downloaded using an account at https://www.emdat.be/

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import datetime
import re
import io
import csv
import os

In [2]:
def import_file_as_dataframe(FILENAME, path):
    
    INPUT_FILE = path + FILENAME
    
    #Read data from file without converting dates
    with open(INPUT_FILE) as f:
        data = json.load(f)
        data = pd.DataFrame(data)

    return data

In [3]:
# Changing format to emdat dataframes
def format_emdat(data):
    
    data['start_date'] = pd.to_datetime(data['start_date'], format='%d/%m/%Y', errors='coerce')
    data['end_date'] = pd.to_datetime(data['end_date'], format='%d/%m/%Y', errors='coerce')
    data.index = data.pop('start_date')
    
    return data

In [4]:
data = import_file_as_dataframe('2019.json', './data/databases')
data = format_emdat(data)
data.head()

Unnamed: 0_level_0,end_date,country_name,iso,location,latitude,longitude,dis_mag_value,dis_mag_scale,dis_type,dis_subtype,total_deaths,total_affected,total_dam,insured_losses,event_name,disaster_no,associated_dis,associated_dis2
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-05-09,2018-05-22,Afghanistan,AFG,,34.94,68.216,184396.9,Km2,Flood,--,72,4000,0,0,,2018-0156,--,--
2018-07-12,2018-07-12,Afghanistan,AFG,khenj district (Panjshir),,,,,Landslide,Mudslide,12,0,0,0,,2018-0189,--,--
2018-05-07,2018-05-15,Afghanistan,AFG,,,,,Km2,Flood,--,18,0,0,0,,2018-0212,--,--
2018-07-12,2018-07-12,Afghanistan,AFG,"Peshghor (Khenj district, Panjshir)",,,,,Landslide,Mudslide,10,2750,0,0,,2018-0242,--,--
2018-08-24,2018-08-24,Afghanistan,AFG,Ghazi-Abad district (Kunar province),,,,Km2,Flood,Flash flood,11,0,0,0,,2018-0350,--,--


In [5]:
data = data.drop(columns=['associated_dis2', 'dis_mag_scale', 'iso', 'dis_type', 'insured_losses', 'total_dam', 'event_name'])

In [6]:
data.head()

Unnamed: 0_level_0,end_date,country_name,location,latitude,longitude,dis_mag_value,dis_subtype,total_deaths,total_affected,disaster_no,associated_dis
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-05-09,2018-05-22,Afghanistan,,34.94,68.216,184396.9,--,72,4000,2018-0156,--
2018-07-12,2018-07-12,Afghanistan,khenj district (Panjshir),,,,Mudslide,12,0,2018-0189,--
2018-05-07,2018-05-15,Afghanistan,,,,,--,18,0,2018-0212,--
2018-07-12,2018-07-12,Afghanistan,"Peshghor (Khenj district, Panjshir)",,,,Mudslide,10,2750,2018-0242,--
2018-08-24,2018-08-24,Afghanistan,Ghazi-Abad district (Kunar province),,,,Flash flood,11,0,2018-0350,--


In [7]:
data = data.sort_values(by='start_date')

In [8]:
list(data.columns.values)

['end_date',
 'country_name',
 'location',
 'latitude',
 'longitude',
 'dis_mag_value',
 'dis_subtype',
 'total_deaths',
 'total_affected',
 'disaster_no',
 'associated_dis']

In [9]:
data.rename(columns={'country_name': 'countries', 'total_deaths': 'deaths', 'dis_mag_value': 'area', 'longitude': 'long', 'latitude': 'lat', 'disaster_no': 'id_emdat', 'dis_subtype': 'type', 'total_affected': 'affected', 'associated_dis': 'cause', 'location': 'locations'}, inplace=True)
data.rename_axis('start_date', inplace=True)

data.head()

Unnamed: 0_level_0,end_date,countries,locations,lat,long,area,type,deaths,affected,id_emdat,cause
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-01-01,2018-01-05,Malaysia,"Kuantan, Rompin, Pekan, Jerantut, Lipis, Bera...",365.883,102.337,117492.07,--,2,12000,2018-0068,--
2018-01-03,2018-01-07,Congo (the Democratic Republic of the),"Galiema, Bandalungwa, Selembao, Masina, Limete...",-4.355,15.116,5597.13,--,51,15760,2018-0005,"Slide (land, mud, snow, rock)"
2018-01-08,2018-01-14,United States of America (the),Santa Barbara County (Montecito and Carpinteri...,,,,Mudslide,21,1366,2018-0010,Flood
2018-01-13,2018-01-17,Philippines (the),"Bicol, Dvao, Eastern Visayas, NOrthern Mindana...",,,,--,11,180000,2018-0051,--
2018-01-13,2018-01-22,Paraguay,"Bañado Norte, Chacarita, Bañado Sur",-25.312,-57.599,2924.11,Riverine flood,0,5000,2018-0066,--


In [10]:
data['in_emdat'] = 1 #Source identifier for future merging

In [11]:
outfile = './data/databases/emdat.csv'
data.to_csv(outfile, sep=',')