# Import packages

In [1]:
import pandas as pd

# Load data

In [2]:
DATASET_FOLDER = '../../datasets/tempetes'
path = DATASET_FOLDER + '/' + 'monthly_hist_temperatures.csv'

In [9]:
hist_temp = pd.read_csv(path,sep = ",")
hist_temp = hist_temp.drop(["Unnamed: 0", "Statistics"], axis = 1)
hist_temp = hist_temp.rename({"Temperature - (Celsius)":"avg_monthly_temperature"}, axis=1)

In [10]:
hist_temp.head()

Unnamed: 0,Country,ISO3,Month,avg_monthly_temperature,Year
0,Afghanistan,AFG,1,-0.0378,1901
1,Afghanistan,AFG,2,2.42348,1901
2,Afghanistan,AFG,3,9.3623,1901
3,Afghanistan,AFG,4,13.5564,1901
4,Afghanistan,AFG,5,18.1509,1901


# Prepare data s.t. to join it with emdat db

In [42]:
hist_temp_by_year_by_country_agg = hist_temp.groupby(["ISO3", "Year"]).agg({"avg_monthly_temperature": ["mean", "min", "max", "median"]}).reset_index()

In [43]:
column_headers = []
for i in range(len(list(hist_temp_by_year_by_country_agg.columns.get_level_values(0)))):
    str0 = list(hist_temp_by_year_by_country_agg.columns.get_level_values(0))[i]
    str1 = list(hist_temp_by_year_by_country_agg.columns.get_level_values(1))[i]
    header = str0 + "_" + str1
    column_headers.append(header)

In [44]:
column_headers

['ISO3_',
 'Year_',
 'avg_monthly_temperature_mean',
 'avg_monthly_temperature_min',
 'avg_monthly_temperature_max',
 'avg_monthly_temperature_median']

In [45]:
hist_temp_by_year_by_country_agg.columns = column_headers

In [46]:
hist_temp_by_year_by_country_agg.head()

Unnamed: 0,ISO3_,Year_,avg_monthly_temperature_mean,avg_monthly_temperature_min,avg_monthly_temperature_max,avg_monthly_temperature_median
0,AFG,1901,13.172872,-0.0378,25.3787,12.42005
1,AFG,1902,13.342649,2.32874,25.0482,12.6819
2,AFG,1903,12.006417,-1.588,24.7819,12.54385
3,AFG,1904,12.531729,-5.5545,25.0061,11.4117
4,AFG,1905,12.267143,-1.8435,25.1425,13.78495


In [47]:
hist_temp_by_year_by_country_agg.shape

(21112, 6)

# Join on emdat db

### Import emdat db

In [48]:
DATASET_FOLDER = '../../datasets/tempetes'
path = DATASET_FOLDER + '/' + 'wb_disasters_bdd.xlsx'

In [49]:
disasters_df = pd.read_excel(path)
disasters_df.head()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
0,1900-9002-CPV,1900,9002,Natural,Climatological,Drought,Drought,,,,...,,11000.0,,,,,,,,3.261389
1,1900-9001-IND,1900,9001,Natural,Climatological,Drought,Drought,,,,...,,1250000.0,,,,,,,,3.261389
2,1904-0003-BGD,1904,3,Natural,Meteorological,Storm,Tropical cyclone,,,OFDA,...,,,,,,,,,,3.5223
3,1906-0023-BEL,1906,23,Natural,Hydrological,Flood,,,,Govern,...,14.0,6.0,,,,,,,,3.5223
4,1906-0024-BEL,1906,24,Natural,Hydrological,Flood,,,,Govern,...,,,,,,,,,,3.5223


### Filter on storms

In [51]:
storms_df = disasters_df[disasters_df["Disaster Type"]=="Storm"]

In [63]:
storms_df.shape

(4328, 43)

In [69]:
storms_df.columns

Index(['Dis No', 'Year', 'Seq', 'Disaster Group', 'Disaster Subgroup',
       'Disaster Type', 'Disaster Subtype', 'Disaster Subsubtype',
       'Event Name', 'Entry Criteria', 'Country', 'ISO', 'Region', 'Continent',
       'Location', 'Origin', 'Associated Dis', 'Associated Dis2',
       'OFDA Response', 'Appeal', 'Declaration', 'Aid Contribution',
       'Dis Mag Value', 'Dis Mag Scale', 'Latitude', 'Longitude', 'Local Time',
       'River Basin', 'Start Year', 'Start Month', 'Start Day', 'End Year',
       'End Month', 'End Day', 'Total Deaths', 'No Injured', 'No Affected',
       'No Homeless', 'Total Affected', 'Reconstruction Costs ('000 US$)',
       'Insured Damages ('000 US$)', 'Total Damages ('000 US$)', 'CPI'],
      dtype='object')

# Join

In [53]:
hist_temp_by_year_by_country_agg.dtypes

ISO3_                              object
Year_                               int64
avg_monthly_temperature_mean      float64
avg_monthly_temperature_min       float64
avg_monthly_temperature_max       float64
avg_monthly_temperature_median    float64
dtype: object

In [56]:
storms_df["Start Year"] = storms_df["Start Year"].astype("int64")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [58]:
storms_by_year_by_country_hist_temp = pd.merge(storms_df, hist_temp_by_year_by_country_agg, how="left", left_on=["Start Year", "ISO"], right_on=["Year_", "ISO3_"])

In [59]:
storms_by_year_by_country_hist_temp.head()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI,ISO3_,Year_,avg_monthly_temperature_mean,avg_monthly_temperature_min,avg_monthly_temperature_max,avg_monthly_temperature_median
0,1904-0003-BGD,1904,3,Natural,Meteorological,Storm,Tropical cyclone,,,OFDA,...,,,,3.5223,BGD,1904.0,24.628058,17.7866,27.9551,26.7014
1,1906-0015-HKG,1906,15,Natural,Meteorological,Storm,Tropical cyclone,,,Kill,...,,,20000.0,3.5223,HKG,1906.0,22.230708,14.5506,28.6989,22.73545
2,1909-0010-BGD,1909,10,Natural,Meteorological,Storm,Tropical cyclone,,,Kill,...,,,,3.5223,BGD,1909.0,25.008875,18.6933,28.3398,26.69
3,1909-0013-BGD,1909,13,Natural,Meteorological,Storm,Tropical cyclone,,,OFDA,...,,,,3.5223,BGD,1909.0,25.008875,18.6933,28.3398,26.69
4,1909-0012-HTI,1909,12,Natural,Meteorological,Storm,Tropical cyclone,,,Kill,...,,,,3.5223,HTI,1909.0,23.569458,21.4873,25.1208,23.70925


In [71]:
storms_by_year_by_country_hist_temp_sel_cols = storms_by_year_by_country_hist_temp[["Start Year", 
                                                                                    "ISO", 
                                                                                    "Country",
                                                                                    "Disaster Type",
                                                                                    "No Affected",
                                                                                    "Total Damages ('000 US$)",
                                                                                    "avg_monthly_temperature_mean",
                                                                                    "avg_monthly_temperature_min",
                                                                                    "avg_monthly_temperature_max",
                                                                                    "avg_monthly_temperature_median"
                                                                                   ]]

In [72]:
storms_by_year_by_country_hist_temp_sel_cols.shape

(4328, 10)

In [73]:
storms_by_year_by_country_hist_temp_sel_cols.head()

Unnamed: 0,Start Year,ISO,Country,Disaster Type,No Affected,Total Damages ('000 US$),avg_monthly_temperature_mean,avg_monthly_temperature_min,avg_monthly_temperature_max,avg_monthly_temperature_median
0,1904,BGD,Bangladesh,Storm,,,24.628058,17.7866,27.9551,26.7014
1,1906,HKG,Hong Kong,Storm,,20000.0,22.230708,14.5506,28.6989,22.73545
2,1909,BGD,Bangladesh,Storm,,,25.008875,18.6933,28.3398,26.69
3,1909,BGD,Bangladesh,Storm,,,25.008875,18.6933,28.3398,26.69
4,1909,HTI,Haiti,Storm,,,23.569458,21.4873,25.1208,23.70925


In [74]:
storms_by_year_by_country_hist_temp_sel_cols.isnull().sum()

Start Year                           0
ISO                                  0
Country                              0
Disaster Type                        0
No Affected                       2397
Total Damages ('000 US$)          2192
avg_monthly_temperature_mean       580
avg_monthly_temperature_min        580
avg_monthly_temperature_max        580
avg_monthly_temperature_median     580
dtype: int64

In [75]:
test = storms_by_year_by_country_hist_temp_sel_cols[storms_by_year_by_country_hist_temp_sel_cols["avg_monthly_temperature_mean"].isnull()]

In [76]:
test.head(20)

Unnamed: 0,Start Year,ISO,Country,Disaster Type,No Affected,Total Damages ('000 US$),avg_monthly_temperature_mean,avg_monthly_temperature_min,avg_monthly_temperature_max,avg_monthly_temperature_median
25,1929,DFR,Germany Fed Rep,Storm,,55000.0,,,,
27,1930,DMA,Dominica,Storm,,,,,,
45,1942,COK,Cook Islands (the),Storm,,,,,,
47,1943,COK,Cook Islands (the),Storm,,,,,,
54,1946,AZO,Azores Islands,Storm,,,,,,
56,1946,COK,Cook Islands (the),Storm,,,,,,
59,1948,BMU,Bermuda,Storm,,,,,,
66,1950,ATG,Antigua and Barbuda,Storm,,1000.0,,,,
74,1953,DFR,Germany Fed Rep,Storm,,25000.0,,,,
75,1954,ANT,Netherlands Antilles,Storm,,,,,,


# Write csv file in datasets > tempetes directory

In [77]:
storms_by_year_by_country_hist_temp_sel_cols.to_csv('../../datasets/tempetes/' + '/' + 'emdat_with_hist_temperatures.csv')