In [225]:
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio as rio
import os
import re
import helpers


## Dataset construction

In [11]:
shp_dir = '../../../Data/shapefiles/'

chad_shp = gpd.read_file(shp_dir + 'TCD_adm/TCD_adm0.shp')
mali_shp = gpd.read_file(shp_dir + 'MLI_adm/MLI_adm0.shp')
niger_shp = gpd.read_file(shp_dir + 'NER_adm/NER_adm0.shp')
nigeria_shp = gpd.read_file(shp_dir + 'NGA_adm/NGA_adm0.shp')

countries = pd.concat([chad_shp, mali_shp, niger_shp, nigeria_shp])

In [14]:
countries = countries.drop(['ISO3', 'NAME_ENGLI',
       'NAME_ISO', 'NAME_FAO', 'NAME_LOCAL', 'NAME_OBSOL', 'NAME_VARIA',
       'NAME_NONLA', 'NAME_FRENC', 'NAME_SPANI', 'NAME_RUSSI',
       'NAME_ARABI', 'NAME_CHINE', 'WASPARTOF', 'CONTAINS', 'SOVEREIGN',
       'ISO2', 'WWW', 'FIPS', 'ISON', 'VALIDFR', 'VALIDTO', 'UNREGION1', 'UNREGION2', 'DEVELOPING', 'CIS',
       'Transition', 'OECD', 'WBREGION', 'WBINCOME', 'WBDEBT', 'WBOTHER',
       'CEEAC', 'CEMAC', 'CEPLG', 'COMESA', 'EAC', 'ECOWAS', 'IGAD',
       'IOC', 'MRU', 'SACU', 'UEMOA', 'UMA', 'PALOP', 'PARTA', 'CACM',
       'EurAsEC', 'Agadir', 'SAARC', 'ASEAN', 'NAFTA', 'GCC', 'CSN',
       'CARICOM', 'EU', 'CAN', 'ACP', 'Landlocked', 'AOSIS', 'SIDS',
       'Islands', 'LDC',], 1)

In [None]:
flood_dir = '../../../Data/DFO flood extents/'
flood_files = [i for i in os.listdir(flood_dir) if re.match(r'msw_20\d\d\.tif$', i)]

In [92]:
ipc_dir = '../../../Data/IPC data/updated_files/'
ipc_files = sorted([i for i in os.listdir(ipc_dir) if re.match(r'^.*201[4-9].*\.tif$', i)])[3:]

ipc_0 = [i for i in ipc_files if re.match(r'^.*\d\d\d\d10.*$', i)]
ipc_1 = [i for i in ipc_files if re.match(r'^.*\d\d\d\d((01)|(12)|(02)).*$', i)]
ipc_2 = [i for i in ipc_files if re.match(r'^.*\d\d\d\d(04).*$', i)]
ipc_3 = [i for i in ipc_files if re.match(r'^.*\d\d\d\d((06)|(07)).*$', i)]

ipc_2.append(ipc_1[-1])
ipc_1.pop()

ipc_years = [int(fname.split('_')[1][:4]) for fname in ipc_0]

In [228]:
[ipc_0,ipc_1]

[['food-food-insecurity-hotspots_201410_geographic.tif',
  'food-food-insecurity-hotspots_201510_geographic.tif',
  'food-food-insecurity-hotspots_201610_geographic.tif',
  'food-food-insecurity-hotspots_201710_geographic.tif',
  'food-food-insecurity-hotspots_201810_geographic.tif'],
 ['food-food-insecurity-hotspots_201501_geographic.tif',
  'food-food-insecurity-hotspots_201602_geographic.tif',
  'food-food-insecurity-hotspots_201702_geographic.tif',
  'food-food-insecurity-hotspots_201802_geographic.tif',
  'food-food-insecurity-hotspots_201812_geographic.tif']]

In [240]:
df = helpers.add_time_to_df(countries, 'ID_0', ipc_years, [1]).drop(['Month', 'datetime'], 1)
df = df.rename(columns={'NAME_0': 'Name'})
df[['Flood extent (% national area)', 'Mean flood duration (days)', 
    'Mean IPC (time 0)', 'Mean IPC (time 1)',
   'Mean IPC (time 2)', 'Mean IPC (time 3)']] = None
df = df.to_crs('EPSG:4326')
df = df.reset_index()

  return _prepare_from_string(" ".join(pjargs))


In [199]:
ipc2_years = [2014, 2018]
ipc3_years = [2014, 2015, 2016, 2017]

list

In [200]:
for i, file in enumerate(ipc_0):
    helpers.raster_calc_annual(df, ipc_dir + file, ipc_years[i], np.ma.mean, 'Mean IPC (time 0)', all_touched = True)

for i, file in enumerate(ipc_1):
    helpers.raster_calc_annual(df, ipc_dir + file, ipc_years[i], np.ma.mean, 'Mean IPC (time 1)', all_touched = True)

for i in range(len(ipc_2)):
    helpers.raster_calc_annual(df, ipc_dir + file, ipc2_years[i], np.ma.mean, 'Mean IPC (time 2)', all_touched = True)

for i in range(len(ipc_3)):
    helpers.raster_calc_annual(df, ipc_dir + file, ipc3_years[i], np.ma.mean, 'Mean IPC (time 3)', all_touched = True)

In [202]:
flood_years = np.arange(2014, 2020)

for i, file in enumerate(flood_files):
    helpers.raster_calc_annual(df, flood_dir + file, flood_years[i], helpers.prop_flood,
                               'Flood extent (% national area)', all_touched = True)

for i, file in enumerate(flood_files):
    helpers.raster_calc_annual(df, flood_dir + file, flood_years[i], helpers.mean_flood_duration,
                               'Mean flood duration (days)', all_touched = True)

In [242]:
df.to_csv('../../../Data/wa_flood_ipc_lag.csv')

In [243]:
flood_stats = pd.read_csv('../../../Data/wa_annual_flood_impacts.csv', index_col = 'ID')

In [246]:
merged_data = df.merge(flood_stats, on = ['Name', 'Year'], how = 'left')

merged_data = merged_data.drop(['index', 'ID_0'], 1)

#merged_data.to_csv('../../../Data/wa_annual_flood_ipc.csv')

KeyError: 'Name'

In [232]:
df.head()

Unnamed: 0,index,ID_0,ISO,Country,OBJECTID_1,POP2000,SQKM,POPSQKM,geometry,Year,Flood extent (% national area),Mean flood duration (days),Mean IPC (time 0),Mean IPC (time 1),Mean IPC (time 2),Mean IPC (time 3)
0,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2014,0.00890018,14.8601,0.99174,1.06208,1.20469,1.20469
1,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2015,0.0075038,15.8072,0.991767,1.17633,,1.20469
2,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2016,0.00721345,15.8893,1.0004,1.46242,,1.20469
3,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2017,0.00589039,13.6281,1.00047,1.3079,,1.20469
4,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2018,0.00749697,16.9034,5.97001e-06,1.20469,1.20469,


In [234]:
x = np.arange(2014,2016)
df.loc[(df.Year.isin(x))]

Unnamed: 0,index,ID_0,ISO,Country,OBJECTID_1,POP2000,SQKM,POPSQKM,geometry,Year,Flood extent (% national area),Mean flood duration (days),Mean IPC (time 0),Mean IPC (time 1),Mean IPC (time 2),Mean IPC (time 3)
0,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2014,0.00890018,14.8601,0.99174,1.06208,1.20469,1.20469
1,0,47,TCD,Chad,220,7885299.0,1168002.0,6.751101,"POLYGON ((16.81695 23.07436, 16.82721 23.06800...",2015,0.0075038,15.8072,0.991767,1.17633,,1.20469
5,0,138,MLI,Mali,148,11350798.0,1256747.0,9.031888,"POLYGON ((-5.00000 24.99800, -5.00000 24.99589...",2014,0.00275617,15.896,0.999681,0.999694,0.9993,0.9993
6,0,138,MLI,Mali,148,11350798.0,1256747.0,9.031888,"POLYGON ((-5.00000 24.99800, -5.00000 24.99589...",2015,0.00317557,15.6885,0.999815,1.1421,,0.9993
10,0,162,NER,Niger,164,10831545.0,1186021.0,9.132676,"POLYGON ((11.98314 23.52222, 12.00000 23.51517...",2014,0.00340503,22.9993,1.02982,1.08943,1.05092,1.05092
11,0,162,NER,Niger,164,10831545.0,1186021.0,9.132676,"POLYGON ((11.98314 23.52222, 12.00000 23.51517...",2015,0.00343887,25.6958,1.05109,1.19794,,1.05092
15,0,163,NGA,Nigeria,166,113861753.0,912038.625,124.843126,"MULTIPOLYGON (((6.44042 4.30875, 6.44042 4.307...",2014,0.0168943,17.9253,1.20132,1.19599,0.000296575,0.000296575
16,0,163,NGA,Nigeria,166,113861753.0,912038.625,124.843126,"MULTIPOLYGON (((6.44042 4.30875, 6.44042 4.307...",2015,0.0229931,14.1274,1.22735,1.24139,,0.000296575


In [235]:
df[['test1', 'test2']] = [1, 2]

In [239]:
for i, row in df.iterrows():
    print(row['Country'])

Chad
Chad
Chad
Chad
Chad
Mali
Mali
Mali
Mali
Mali
Niger
Niger
Niger
Niger
Niger
Nigeria
Nigeria
Nigeria
Nigeria
Nigeria


In [241]:
df.columns

Index(['index', 'ID_0', 'ISO', 'Name', 'OBJECTID_1', 'POP2000', 'SQKM',
       'POPSQKM', 'geometry', 'Year', 'Flood extent (% national area)',
       'Mean flood duration (days)', 'Mean IPC (time 0)', 'Mean IPC (time 1)',
       'Mean IPC (time 2)', 'Mean IPC (time 3)'],
      dtype='object')