# EXTRACT EVENTS DARTMOUTH FLOOD OBSERVATORY

The file used can be found at: http://floodobservatory.colorado.edu/Archives/index.html

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import datetime
import re
import io
import csv
import os

In [2]:
INPUT_FILE = "../../Code/data/Dartmouth/FloodArchive.xlsx" #Download from link above

data = pd.read_excel(INPUT_FILE, index_col=0)

In [3]:
data.head()

Unnamed: 0_level_0,GlideNumber,Country,OtherCountry,long,lat,Area,Began,Ended,Validation,Dead,Displaced,MainCause,Severity
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0,Algeria,0,5.23026,35.8142,92615.67,1985-01-01,1985-01-05,News,26,3000,Heavy rain,1.0
2,0,Brazil,0,-45.3489,-18.7111,678498.82,1985-01-15,1985-02-02,News,229,80000,Heavy rain,2.0
3,0,Phillipines,0,122.974,10.0207,12846.03,1985-01-20,1985-01-21,News,43,444,Torrential rain,1.0
4,0,Indonesia,0,124.606,1.01489,16542.12,1985-02-04,1985-02-18,News,21,300,Torrential rain,1.0
5,0,Mozambique,0,32.3491,-25.8693,20082.21,1985-02-09,1985-02-11,News,19,0,Heavy rain,2.0


In [4]:
data.index = data.pop('Began')

In [5]:
data.head()

Unnamed: 0_level_0,GlideNumber,Country,OtherCountry,long,lat,Area,Ended,Validation,Dead,Displaced,MainCause,Severity
Began,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1985-01-01,0,Algeria,0,5.23026,35.8142,92615.67,1985-01-05,News,26,3000,Heavy rain,1.0
1985-01-15,0,Brazil,0,-45.3489,-18.7111,678498.82,1985-02-02,News,229,80000,Heavy rain,2.0
1985-01-20,0,Phillipines,0,122.974,10.0207,12846.03,1985-01-21,News,43,444,Torrential rain,1.0
1985-02-04,0,Indonesia,0,124.606,1.01489,16542.12,1985-02-18,News,21,300,Torrential rain,1.0
1985-02-09,0,Mozambique,0,32.3491,-25.8693,20082.21,1985-02-11,News,19,0,Heavy rain,2.0


In [6]:
data = data.drop(columns=['GlideNumber', 'Validation', 'Severity'])
data.rename(columns={'Country': 'countries', 'Dead': 'deaths', 'Displaced': 'affected', 'Ended': 'end_date', 'MainCause': 'cause', 'ID': 'id_darmouth', 'Area': 'area'}, inplace=True)
data.rename_axis('start_date', inplace=True)

In [7]:
data = data.sort_values(by='start_date')
data['in_darmouth'] = 1 #Add identifier of source for future merging process
data.head()

Unnamed: 0_level_0,countries,OtherCountry,long,lat,area,end_date,deaths,affected,cause,in_darmouth
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1985-01-01,Algeria,0,5.23026,35.8142,92615.67,1985-01-05,26,3000,Heavy rain,1
1985-01-15,Brazil,0,-45.3489,-18.7111,678498.82,1985-02-02,229,80000,Heavy rain,1
1985-01-20,Phillipines,0,122.974,10.0207,12846.03,1985-01-21,43,444,Torrential rain,1
1985-02-04,Indonesia,0,124.606,1.01489,16542.12,1985-02-18,21,300,Torrential rain,1
1985-02-09,Mozambique,0,32.3491,-25.8693,20082.21,1985-02-11,19,0,Heavy rain,1


In [8]:
data.reset_index(inplace = True)
data['OtherCountry'].replace({0: np.nan}, inplace=True)

for index, row in data.iterrows(): #We join several countries in one single column
    if (not pd.isnull(row['OtherCountry'])):
        data.at[index, 'countries'] = (", ".join([row['countries'], row['OtherCountry']]))

In [9]:
data = data.drop(columns=['OtherCountry'])
data.index = data.pop('start_date')
data.head()

Unnamed: 0_level_0,countries,long,lat,area,end_date,deaths,affected,cause,in_darmouth
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1985-01-01,Algeria,5.23026,35.8142,92615.67,1985-01-05,26,3000,Heavy rain,1
1985-01-15,Brazil,-45.3489,-18.7111,678498.82,1985-02-02,229,80000,Heavy rain,1
1985-01-20,Phillipines,122.974,10.0207,12846.03,1985-01-21,43,444,Torrential rain,1
1985-02-04,Indonesia,124.606,1.01489,16542.12,1985-02-18,21,300,Torrential rain,1
1985-02-09,Mozambique,32.3491,-25.8693,20082.21,1985-02-11,19,0,Heavy rain,1


In [10]:
outfile = "./data/databases/dartmouth.csv"
data.to_csv(outfile, sep=',')

In [11]:
print(list(data.columns.values))

['countries', 'long', 'lat', 'area', 'end_date', 'deaths', 'affected', 'cause', 'in_darmouth']
