# Process the JRC Excel files

### JRC Data

ExpDam is direct expected damage per year from river flooding in Euro (2010 values). Data includes baseline values (average 1976-2005) and impact at SWLs.

All figures are multi-model averages based on EC-EARTH r1 to r7 (7 models)

PopAff is population affected per year from river flooding. Data includes baseline values (average 1976-2005) and impact at SWLs.

All figures are multi-model averages based on EC-EARTH r1 to r7 (7 models)

Reference
Alfieri, L., Bisselink, B., Dottori, F., Naumann, G., de Roo, A., Salamon, P., Wyser, K. and Feyen, L.: Global projections of river flood risk in a warmer world, Earths Future, doi:10.1002/2016EF000485, 2017.

### Note:

We need to calculate anomalies against the historical base period.

In [14]:
import pandas as pd
import geopandas as gpd
from iso3166 import countries
import os
import warnings
warnings.filterwarnings('ignore')

In [3]:
def identify_netcdf_and_csv_files(path='data/'):
    """Crawl through a specified folder and return a dict of the netcdf d['nc']
    and csv d['csv'] files contained within.
    Returns something like
    {'nc':'data/CNRS_data/cSoil/orchidee-giss-ecearth.SWL_15.eco.cSoil.nc'}
    """
    netcdf_files = []
    csv_files = []
    for root, dirs, files in os.walk(path):
        if isinstance([], type(files)):
            for f in files:
                if f.split('.')[-1] in ['nc']:
                    netcdf_files.append(''.join([root,'/',f]))
                elif  f.split('.')[-1] in ['csv']:
                    csv_files.append(''.join([root,'/',f]))
    return {'nc':netcdf_files,'csv':csv_files}


In [165]:

def extract_value(df, swl, verbose =False):
    """Extract the historical and absolute SWL values and calculate
    an anomaly.
    """
    if verbose: print(df[swl].values)
    if 'PopAff_1976-2005' in data_slice:
        historical_key = 'PopAff_1976-2005'
        #print("In pop aff")
    elif 'ExpDam_1976-2005' in data_slice:
        historical_key = 'ExpDam_1976-2005'
    else:
        raise ValueError('Found no historical data in the file')
    # Get the SWL mean
    try:
        tmp_abs = float(''.join(df[swl].values[0].split(",")))
    except:
        tmp_abs = None
    # Get the historical mean
    try:
        tmp_historical = float(''.join(df[historical_key].values[0].split(",")))
        if tmp_historical == 0: tmp_historical = None
    except:
        tmp_historical = None
    #print(tmp_historical, tmp_abs)
    if all([tmp_historical, tmp_abs]):
        anomaly = tmp_abs - tmp_historical
    else:
        anomaly = None
    return anomaly


def gen_output_fname(fnm, swl_label):
    path = '/'.join(fnm.split('/')[1:3])
    file_name = swl_label+'_'+fnm.split('/')[-1]
    tmp_out = '/'.join(['./processed/admin0/', path, file_name])
    return tmp_out

gen_output_fname(fnm, swl)


def process_JRC_excel(fnm):
    # I should loop over the set of shapes in gadams8 shapefile and look for the country in the data...
    # SIMPLIFIED SHAPES FOR ADMIN 0 LEVEL
    s = gpd.read_file("./data/gadm28_adm0_simplified/gadm28_adm0_simplified.shp")
    raw_data = pd.read_csv(fnm)
    # Note 184 are how many valid admin 0 areas we got with the netcdf data.
    keys =['name_0','iso','variable','swl_info',
        'count', 'max','min','mean','std','impact_tag','institution',
        'model_long_name','model_short_name','model_taxonomy',
        'is_multi_model_summary','is_seasonal','season','is_monthly',
        'month']
    swl_dic = {'SWL1.5':1.0, 'SWL2':2.0, 'SWL4':4.0}

    possible_vars = {'data/JRC_data/river_floods/PopAff_SWLs_Country.csv':'river_floods_PopAff',
                          'data/JRC_data/river_floods/ExpDam_SWLs_Country.csv':'river_floods_ExpDam'}

    num_swls = 0
    for swl in ['SWL1.5','SWL2', 'SWL4']:
        num_swls += 1
        tot = 0
        valid = 0
        extracted_values = []
        meta_level1 = {'variable':possible_vars[fnm],
                       'swl_info':swl_dic[swl],
                       'is_multi_model_summary':True,
                       'model_short_name':'EC-EARTH',
                       'model_long_name': "Projections of average changes in river flood risk per country at SWLs, obtained with the JRC impact model based on EC-EARTH r1-r7 climate projections.",
                       'model_taxonomy': 'EC-EARTH',
                       'is_seasonal': False,
                       'season': None,
                       'is_monthly':False,
                       'month': None,
                       'impact_tag': 'w',
                       'institution':None,
                        }
        for i in s.index:
            tot += 1
            meta_level2 = {'name_0': s.name_0[i],
                           'iso': s.iso[i],}
            tmp_mask = raw_data['ISO3_countryname'] == meta_level2['iso']
            data_slice = raw_data[tmp_mask]
            if len(data_slice) == 1:
                #print(meta_level2['iso'])
                extracted = extract_value(data_slice, swl)
                dic_level3 = {'min':None,
                              'mean': extracted,
                              'max': None,
                              'count':None,
                              'std':None}
                valid += 1
                # FIND ALL VALUES NEEDED BY KEY
                # WRITE TO EXTRACTED_VALUES
                d = {**meta_level1, **meta_level2, **dic_level3}
                extracted_values.append([d[key] for key in keys])
        tmp_df = pd.DataFrame(extracted_values, columns=keys)
        output_filename = gen_output_fname(fnm, swl)
        path_check ='/'.join(output_filename.split('/')[:-1])
        # WRITE EXTRACTED VALUES TO A SPECIFIC SWL CSV FILE IN PROCESSED
        if not os.path.exists(path_check):
            os.makedirs(path_check)
        tmp_df.to_csv(output_filename, index=False)
        print('Created ', output_filename)
    print('TOTAL in loop:', tot)
    print('valid:', valid)
    print("Looped for", num_swls, 'swls')

In [166]:
fs = identify_netcdf_and_csv_files(path='data/JRC_data')
fs['csv']
for fnm in fs['csv']:
    process_JRC_excel(fnm)

Created  ./processed/admin0//JRC_data/river_floods/SWL1.5_PopAff_SWLs_Country.csv
Created  ./processed/admin0//JRC_data/river_floods/SWL2_PopAff_SWLs_Country.csv
Created  ./processed/admin0//JRC_data/river_floods/SWL4_PopAff_SWLs_Country.csv
TOTAL in loop: 240
valid: 184
Looped for 3 swls
Created  ./processed/admin0//JRC_data/river_floods/SWL1.5_ExpDam_SWLs_Country.csv
Created  ./processed/admin0//JRC_data/river_floods/SWL2_ExpDam_SWLs_Country.csv
Created  ./processed/admin0//JRC_data/river_floods/SWL4_ExpDam_SWLs_Country.csv
TOTAL in loop: 240
valid: 184
Looped for 3 swls
