In [54]:
import ipywidgets
from ipywidgets import interact, FloatSlider

In [55]:
import pandas as pd
import pathlib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

import geopandas as gpd

from functools import reduce

## Reading Files
---------------

In [56]:
# Finding CABRa attributes csv files
path_files = pathlib.Path('CABRa_attributes_csv/')
cabra_attributes_files = path_files.rglob('*.csv')

In [57]:
# Reading files
dfs = []
for file in cabra_attributes_files:
    print(file)
    df = pd.read_csv(file, engine='python', delimiter=';')
    dfs.append(df)

CABRa_attributes_csv\CABra_dataset _Additional_attributes.csv
CABRa_attributes_csv\CABra_dataset _Climate_attributes.csv
CABRa_attributes_csv\CABra_dataset_Groundwater_attributes.csv
CABRa_attributes_csv\CABra_dataset_Hydrologic_disturbance_attributes.csv
CABRa_attributes_csv\CABra_dataset_Land_cover_attributes.csv
CABRa_attributes_csv\CABra_dataset_Streamflow_attributes.csv
CABRa_attributes_csv\cabra_general_attributes.csv
CABRa_attributes_csv\cabra_Geology_attributes.csv
CABRa_attributes_csv\CABra_soil_attributes.csv
CABRa_attributes_csv\CABra_topography_attributes.csv


## Creating Dataset and calculating variables
-----------------------

In [58]:
# Merging DataFrames
df_merged = reduce(lambda  left,right: pd.merge(left, right, on=['CABra ID', 'ANA ID'], how='outer'), dfs)

In [59]:
df_merged.head()

Unnamed: 0,CABra ID,ANA ID,longitude_centroid,latitude_centroid,dist_coast,clim_p,clim_tmin,clim_tmax,clim_rh,clim_wind,...,soil_carbon,soil_bulk,soil_depth,catch_area,elev_mean,elev_min,elev_max,elev_gauge,catch_slope,catch_order
0,1,13880000,-68.028,-8.989,1407.55,5.64,21.61,30.48,83.17,0.82,...,34.439,25.456,1.335,243.410.279,178.97,44.65,564.03,44.65,3.14,7
1,2,14110000,-66.695,2.016,1009.09,8.88,21.83,28.6,86.41,0.42,...,21.926,17.609,1.256,76.898.617,222.97,80.4,2892.44,80.4,4.72,6
2,3,14230000,-68.936,1.573,1009.7,8.52,22.18,29.67,85.71,0.6,...,23.241,20.53,1.219,24.018.785,151.47,80.86,952.2,80.86,2.81,4
3,4,14250000,-67.305,1.734,1095.42,8.86,22.0,29.13,86.26,0.49,...,22.725,18.612,1.235,127.540.913,184.31,74.32,2892.44,74.32,3.71,6
4,5,14260000,-70.894,1.201,964.52,7.79,21.91,28.8,85.44,0.67,...,22.066,20.859,1.248,41.762.638,206.08,85.09,615.36,85.09,2.3,5


In [60]:
# All CABRa attributes files
df_merged.columns

Index(['CABra ID', 'ANA ID', 'longitude_centroid', 'latitude_centroid',
       'dist_coast', 'clim_p', 'clim_tmin', 'clim_tmax', 'clim_rh',
       'clim_wind', 'clim_srad', 'clim_et', 'clim_pet', 'aridity_index',
       'p_seasonality', 'clim_quality', 'aquif_name', 'aquif_type',
       'catch_wtd', 'catch_hand', 'hand_class', 'dist_urban', 'cover_urban_x',
       'cover_crops_x', 'res_number', 'res_area', 'res_volume',
       'res_regulation', 'water_demand', 'hdisturb_index', 'cover_main',
       'cover_bare', 'cover_forest', 'cover_crops_y', 'cover_grass',
       'cover_moss', 'cover_shrub', 'cover_urban_y', 'cover_snow',
       'cover_waterp', 'cover_waters', 'ndvi_djf', 'ndvi_mam', 'ndvi_jja',
       'ndvi_son', 'q_mean', 'q_1', 'q_5', 'q_95', 'q_99', 'q_lf', 'q_ld',
       'q_hf', 'q_hd', 'q_hfd', 'q_zero', 'q_cv', 'q_lcv', 'q_hcv',
       'q_elasticity', 'fdc_slope', 'baseflow_index', 'runoff_coef',
       'longitude', 'latitude', 'gauge_hreg', 'gauge_biome', 'gauge_state',
    

In [61]:
# Calculating ECI (Liu et al. (2020))
# Aparently there is a Warning
df_merged['ECI'] = np.log(df_merged['q_mean']/(df_merged['clim_p']-df_merged['clim_et']))

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [62]:
# There is one bad ID
# log(0) => Erro
df_merged.loc[df_merged['ECI'].isnull(),['ECI', 'q_mean', 'clim_p', 'clim_et']]

Unnamed: 0,ECI,q_mean,clim_p,clim_et
686,,0.03,1.64,1.66


In [63]:
# Calculating the ratio Effective Area / Topographic Area (Liu et al. (2020))
df_merged['Aeff/Atopo'] = df_merged['q_mean']/(df_merged['clim_p']-df_merged['clim_et'])

In [64]:
# Checking ECI and Ratio
df_merged[['Aeff/Atopo','ECI']].describe()

Unnamed: 0,Aeff/Atopo,ECI
count,735.0,734.0
mean,1.033062,-0.057251
std,0.472764,0.461552
min,-1.5,-2.277267
25%,0.812509,-0.205516
50%,1.00625,0.006505
75%,1.196199,0.179199
max,6.781955,1.914265


In [65]:
# The column 'catch_area' is problematic, because it was a object type.
# Changed to 'int'
df_merged['catch_area'] = df_merged['catch_area'].str.replace('.','')
df_merged['catch_area'] = df_merged['catch_area'].astype('int64')

In [66]:
# List of relevant variables
variables = ['aridity_index', 'catch_area','catch_slope','sub_permeability','elev_mean', 'dist_coast','res_area']

In [67]:
# Creading GeoDataFrame
gdf = gpd.GeoDataFrame(df_merged, geometry=gpd.points_from_xy(df_merged['longitude_centroid'], df_merged['latitude_centroid']))

In [68]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [120]:
def map_ratio(max_ratio, min_ratio):
    df_merged.loc[df_merged['Aeff/Atopo']>=max_ratio, 'substantial'] = 'gaining'

    df_merged.loc[(df_merged['Aeff/Atopo']>min_ratio)&
                  (df_merged['Aeff/Atopo']<=1), 'substantial'] = 'smaller_losing'
    df_merged.loc[(df_merged['Aeff/Atopo']<max_ratio)&
              (df_merged['Aeff/Atopo']>1), 'substantial'] = 'smaller_gaining'
    df_merged.loc[df_merged['Aeff/Atopo']<=min_ratio, 'substantial'] = 'losing'
    
    fig, ax = plt.subplots(1, 1, figsize=(7,7))
    world[world.continent == 'South America'].plot(color='white', edgecolor='black', ax=ax)

    gdf.loc[gdf['substantial']=='smaller_gaining'].plot(color='lightblue', ax=ax,alpha=0.3)
    gdf.loc[gdf['substantial']=='smaller_losing'].plot(color='pink', ax=ax,alpha=0.3)
    gdf.loc[gdf['substantial']=='gaining'].plot(color='blue', ax=ax, alpha=0.3)
    gdf.loc[gdf['substantial']=='losing'].plot(color='red', ax=ax, alpha=0.3)

#     fig.savefig('mapinha.png')
    plt.show()

In [121]:
def scatter_ratio(max_ratio, min_ratio):
    df_merged.loc[df_merged['Aeff/Atopo']>=max_ratio, 'substantial'] = 'gaining'
    df_merged.loc[(df_merged['Aeff/Atopo']<max_ratio)&
                  (df_merged['Aeff/Atopo']>1), 'substantial'] = 'smaller_gaining'
    df_merged.loc[(df_merged['Aeff/Atopo']>min_ratio)&
                  (df_merged['Aeff/Atopo']<=1), 'substantial'] = 'smaller_losing'
    df_merged.loc[df_merged['Aeff/Atopo']<=min_ratio, 'substantial'] = 'losing'
    fig, axes = plt.subplots(3,4, figsize=(25,20))
    sns.scatterplot(x='aridity_index', y='ECI', hue='substantial',data=df_merged, ax=axes[0,0], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='catch_area', y='ECI', hue='substantial', data=df_merged, ax=axes[0,1], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='catch_slope', y='ECI', hue='substantial', data=df_merged, ax=axes[0,2], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='dist_coast', y='ECI', hue='substantial', data=df_merged, ax=axes[0,3], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='sub_permeability', y='ECI', hue='substantial', data=df_merged, ax=axes[1,0], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='elev_mean', y='ECI', hue='substantial', data=df_merged, ax=axes[1,1], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='res_area', y='ECI', hue='substantial', data=df_merged, ax=axes[1,2], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='soil_textclass', y='ECI', hue='substantial', data=df_merged, ax=axes[1,3], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='sub_porosity', y='ECI', hue='substantial', data=df_merged, ax=axes[2,0], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='gauge_biome', y='ECI', hue='substantial', data=df_merged, ax=axes[2,1], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x='sub_hconduc', y='ECI', hue='substantial', data=df_merged, ax=axes[2,2], palette=['lightblue','blue','pink', 'red'], alpha=0.5)
    sns.scatterplot(x=df_merged['clim_pet']/df_merged['clim_p'], y=(df_merged['clim_p']-df_merged['q_mean'])/df_merged['clim_p'], hue='substantial', data=df_merged, ax=axes[2,3])
#     fig.savefig('dsafsdfs.png')

In [122]:
max_widget = FloatSlider(value=2,min=1,max=3, step=0.01)
min_widget = FloatSlider(value=0.5,min=0,max=1, step=0.01)

In [123]:
interact(map_ratio, max_ratio=max_widget, min_ratio=min_widget);

interactive(children=(FloatSlider(value=2.0, description='max_ratio', max=3.0, min=1.0, step=0.01), FloatSlide…

In [124]:
interact(scatter_ratio, max_ratio=max_widget, min_ratio=min_widget);

interactive(children=(FloatSlider(value=2.0, description='max_ratio', max=3.0, min=1.0, step=0.01), FloatSlide…