In [1]:
# import functions 
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import glob
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
from pylab import MaxNLocator
import rioxarray
from shapely.geometry import Point
from shapely.geometry import mapping

In [2]:
def geo_idx(dd, dd_array):
   """
     search for nearest decimal degree in an array of decimal degrees and return the index.
     np.argmin returns the indices of minium value along an axis.
     so subtract dd from all values in dd_array, take absolute value and find index of minium.
    """
   geo_idx = (np.abs(dd_array - dd)).argmin()
   return geo_idx

In [3]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from multiprocessing import Pool
from inspect import signature

#--------------------------------------------------------------
# @profile
def parallel(function, it, nbrCores, noInteract=False):
    #evaluates in parallel all the results, not lazy

    #disable interactive window for plots, also kills existing figures
    if noInteract: plt.switch_backend('Agg')

    #generate pool of workers
    with Pool(processes = nbrCores) as pool:

        #single argument for function
        if len(signature(function).parameters)==1:
            results = pool.map(function, it)

        #mutliple arguments for function
        else:
            results = pool.starmap(function, it)

    #kill all processes
    pool.close()
    pool.join()

    #re enable interactive window for plots
    if noInteract: 
        try: plt.switch_backend('Qt5Agg')
        except ImportError: pass   #pass if non-interactive shell 

    if type(results[0])==tuple:
       
        return (np.asarray(r) for r in zip(*results)) 
    #convert directly to numpy array, merge each proc times
    else:
        return np.asarray(results)


#--------------------------------------------------------------
def distrib_task(begin, end, division) :

    #job distribution
    size   = end - begin + 1    #total number of indexes
    segm   = size // division   #number of iteration to perform per division
    remain = size % division    #remaining indexes to calculate after division

    #handles case if less time elements than division
    if segm == 0: division = remain

    #initialization
    lower = begin
    jobs = [None]*division

    #loop over divisions
    for i in range(division):

        #distribute indexes into the divisions, accounting for remaining loops
        upper = lower + segm    #upper index
        if remain > 0:
            upper  += 1      #added +1 if additional index to process
            remain -= 1      #keep track of the remaining loops to distribute

        jobs[i] = slice(lower,upper,1)
        lower = upper             #next pair lower index
        #next division

    return jobs

In [4]:
from rich import print
import pandas as pd
from datetime import datetime
from xlsx2csv import Xlsx2csv
from io import StringIO

def timer(name, startTime = None):
    if startTime:
        print(f"Timer: Elapsed time for [{name}]: {datetime.now() - startTime}")
    else:
        startTime = datetime.now()
        print(f"Timer: Starting [{name}] at {startTime}")
        return startTime


def read_excel(path: str, sheet_index: int) -> pd.DataFrame:
    print(path)
    buffer = StringIO()
    Xlsx2csv(path, outputencoding="utf-8").convert(buffer,sheetid=sheet_index)
    buffer.seek(0)
    df = pd.read_csv(buffer,low_memory=False)
    return df

In [5]:
%matplotlib inline

In [6]:
def natural_keys_urban(text):
    start = text.split('w_')[-1].index('t')
    end  = text.split('w_')[-1].index('_')
    mo = text.split('w_')[-1][start+1:end]
    year = text.split('w_')[-1][-4]
    return int(year)*12+int(mo)
def natural_keys(text):
    start = text.split('_y')[-1].index('t')
    end  = text.split('_y')[-1].index('/')
    mo = text.split('_y')[-1][start+1:end]
    year = text.split('_y')[-1][0]
    return int(year)*12+int(mo)
def natural_keys_ag(text):
    start = text.split('w_')[-1].index('m')
    end  = text.split('w_')[-1].index('_')
    mo = text.split('w_')[-1][start+1:end]
    year = text.split('w_')[-1][-4]
    return int(year)*12+int(mo)

In [7]:
# basepath = r'/oak/stanford/groups/gorelick/bhima_08262024'
basepath = r"/scratch/users/awan005/bhima_mas_code"

In [15]:
file_path1 = basepath + r"/outputs/data_raw/sixyears_synergy/hydro_outputs_scenario1"
file_path2 = basepath + r"/outputs/data_raw/sixyears_synergy/hydro_outputs_scenario3"
file_path3 = basepath + r"/outputs/data_raw/sixyears_synergy/hydro_outputs_scenario2"
file_path4 = basepath + r"/outputs/data_raw/sixyears_synergy/hydro_outputs_scenario4"

In [16]:
sim = ['agphys','agsupplyphys']

In [17]:
baseoutput_tt = []
NonIrr_GW_path_tt = []
NonIrr_Res_path_tt = []
NonIrr_Lake_path_tt = []
planning_output_mo_gw_path_tt = []
planning_output_mo_sw_path_tt = []


for k in range(0,len(sim),1):
    for file_path in [file_path1,file_path2,file_path3,file_path4]:
        baseoutput = glob.glob(file_path + '_' + sim[k]+ "/implementation*/implementation_human_outputs.xlsx")
        NonIrr_GW_path = glob.glob(file_path + '_' + sim[k] + r"/final_y*/GW_Domestic_daily.nc")
        NonIrr_Res_path = glob.glob(file_path + '_' + sim[k] + r"/final_y*/Res_Domestic_daily.nc")
        NonIrr_Lake_path = glob.glob(file_path + '_' + sim[k] + r"/final_y*/Lake_Domestic_daily.nc")
        planning_output_mo_gw_path = glob.glob(file_path + '_' + sim[k]+ 
                                               r'/urban_to_hydro/urban_CWatM_implementation_gw_t*.nc')
        planning_output_mo_sw_path = glob.glob(file_path + '_' + sim[k]+ 
                                               r'/urban_to_hydro/urban_CWatM_implementation_sw_t*.nc')

        baseoutput.sort(key=natural_keys)
        NonIrr_GW_path.sort(key=natural_keys)
        NonIrr_Res_path.sort(key=natural_keys)
        NonIrr_Lake_path.sort(key=natural_keys)
        planning_output_mo_gw_path.sort(key=natural_keys_urban)
        planning_output_mo_sw_path.sort(key=natural_keys_urban)

        baseoutput_tt += baseoutput[:36]
        NonIrr_GW_path_tt+=NonIrr_GW_path[:36]
        NonIrr_Res_path_tt+=NonIrr_Res_path[:36]
        NonIrr_Lake_path_tt+=NonIrr_Lake_path[:36]
        planning_output_mo_gw_path_tt+=planning_output_mo_gw_path[:36]
        planning_output_mo_sw_path_tt+=planning_output_mo_sw_path[:36]

In [18]:
len(baseoutput_tt)/36/4

6.0

In [19]:
print(len(baseoutput_tt)==len(NonIrr_GW_path_tt)==len(NonIrr_Res_path_tt)==len(NonIrr_Lake_path_tt)==len(planning_output_mo_gw_path_tt)==len(planning_output_mo_sw_path_tt))

In [20]:
import psutil

In [21]:
nbrCores = 64 #psutil.cpu_count(logical=False)

jobs = distrib_task(0,len(baseoutput_tt)-1,nbrCores)
print(nbrCores)
it = ((j,1) for j in baseoutput_tt)
it2 = ((j,2) for j in baseoutput_tt)

out_total = parallel(read_excel,it,nbrCores)
out_hh = parallel(read_excel,it2,nbrCores)

In [22]:
def urban_cwatm_ratio(i):

    print(NonIrr_GW_path_tt[i])
    NonIrr_GW_nc = xr.open_dataarray(NonIrr_GW_path_tt[i])[:-1]
    merger = xr.merge([NonIrr_GW_nc, cellarea], join='override')
    cwatm_gw = merger.to_dataframe().reset_index()
    cwatm_gw['gw'] = cwatm_gw.GW_Domestic * cwatm_gw.cellArea_totalend
    cwatm_gw = cwatm_gw.groupby(['lat','lon']).sum().reset_index().round(
        {'lon': 4, 'lat': 4}) 
    gw_cwatm_average=cwatm_gw['gw'].sum()*1e-6
    
    ### urban groundwater non-irr use
    print(planning_output_mo_gw_path_tt[i])
    gw_nc = xr.open_dataarray(planning_output_mo_gw_path_tt[i])
    gw = gw_nc.to_dataframe().reset_index().round({'lon': 4, 'lat': 4})
    gw.replace([np.inf, -np.inf], np.nan, inplace=True)
    gw = gw.fillna(0)
    merge_df = adminsegs_df.merge(gw,on=['lat','lon'])
    gw_average=merge_df.groupby('adminSegments_totalend').sum().sum()[-1]*1e-6
    
    ### cwatm over urban gw ratio
    urban_cwatm_gw = gw.merge(cwatm_gw[['lat','lon','gw']], on=['lat','lon']).reset_index()
    urban_cwatm_gw = urban_cwatm_gw.rename(columns={'lat':'y','lon':'x'})
    urban_cwatm_gw['ratio_gw'] = urban_cwatm_gw['gw']/urban_cwatm_gw['gw_m3']
    urban_cwatm_gw['ratio_gw'] = urban_cwatm_gw['ratio_gw'].fillna(0)
    
    ### cwatm res water non-irr use
    print(NonIrr_Res_path_tt[i])
    NonIrr_Res_nc = xr.open_dataarray(NonIrr_Res_path_tt[i])[:-1]
    merger = xr.merge([NonIrr_Res_nc, cellarea], join='override')
    cwatm_res = merger.to_dataframe().reset_index()
    cwatm_res['res'] = cwatm_res.Res_Domestic * cwatm_res.cellArea_totalend
    cwatm_res = cwatm_res.groupby(['lat','lon']).sum().reset_index().round(
        {'lon': 4, 'lat': 4})
    res_cwatm_average=cwatm_res['res'].sum()*1e-6
    
    ### urban lake water non-irr use
    print(NonIrr_Lake_path_tt[i])
    NonIrr_Lake_nc = xr.open_dataarray(NonIrr_Lake_path_tt[i])[:-1]
    merger = xr.merge([NonIrr_Lake_nc, cellarea], join='override')
    cwatm_lake = merger.to_dataframe().reset_index()
    cwatm_lake['lake'] = cwatm_lake.Lake_Domestic * cwatm_lake.cellArea_totalend
    cwatm_lake = cwatm_lake.groupby(['lat','lon']).sum().reset_index().round(
        {'lon': 4, 'lat': 4}) 
    lake_cwatm_average=cwatm_lake['lake'].sum()*1e-6
    
    ### urban res and lake water non-irr use
    cwatm_lake_sw = cwatm_lake.merge(cwatm_res, on=['lat','lon'])
    cwatm_lake_sw['sw']= cwatm_lake_sw['res']+ cwatm_lake_sw['lake']
    
    ### urban surface water non-irr use
    print(planning_output_mo_sw_path_tt[i])
    sw_nc = xr.open_dataarray(planning_output_mo_sw_path_tt[i])
    sw = sw_nc.to_dataframe().reset_index().round({'lon': 4, 'lat': 4})
    sw.replace([np.inf, -np.inf], np.nan, inplace=True)
    sw = sw.fillna(0)
    merge_df = adminsegs_df.merge(sw,on=['lat','lon'])
    sw_average=merge_df.groupby('adminSegments_totalend').sum().sum()[-1]*1e-6
    
    ### cwatm over urban sw ratio
    urban_cwatm = sw.merge(cwatm_lake_sw[['lat','lon','sw']], on=['lat','lon']).reset_index()
    urban_cwatm = urban_cwatm.rename(columns={'lat':'y','lon':'x'})
    urban_cwatm['ratio'] = urban_cwatm['sw']/urban_cwatm['sw_m3']
    
    df_hh = pd.DataFrame(out_hh[i],columns=["x", "y", "units", "size", "income","piped_m3/mo", "well_m3/mo", "tanker_m3/mo","tanker_tier2_m3/mo",
                     "electr_kWh/mo", "food_INR/mo", "electr_for_water_kWh/mo",
                     "total_water_m3/mo", "non_piped_m3/mo", "non_piped_percent",
                     "electr_for_water_per_unit", "piped_per_unit", "well_per_unit",
                     "non_piped_per_unit", "non_piped_pct_per_unit", "total_water_per_unit",
                     "piped_expenditure_per_unit", "well_expenditure_per_unit",
                     "tanker_expenditure_per_unit","tanker_tier2_expenditure_per_unit", "expenditure_per_unit",
                     "electricity price", "electricity expenditure","well type"])
    
    slum_factor = 0.288614
    df_hh.loc[range(0,df_hh.shape[0]-1,2),'hh_income'] = df_hh.loc[range(0,df_hh.shape[0]-1,2),'income']
    df_hh.loc[range(1,df_hh.shape[0],2),'hh_income'] = df_hh.loc[range(1,df_hh.shape[0],2),'income'] * slum_factor 
    
    df_hh.loc[range(0,df_hh.shape[0]-1,2),'category'] = 1 # represent household 
    df_hh.loc[range(1,df_hh.shape[0],2),'category'] = 2 # represent slum household
    
    df_hh['units_size'] = df_hh['units'] * df_hh['size']
    df_hh['income_pp'] = df_hh['hh_income']/df_hh['size']
    
    ### combine household dataframe with piped water ratio
    piped_hh = df_hh.round({'x': 4, 'y': 4}).merge(urban_cwatm, on =['x','y'])
    piped_hh['geometry']= piped_hh.apply(lambda row: Point(row["x"], row["y"]), axis=1)
    hh_pp_gpd = gpd.GeoDataFrame(piped_hh, crs=tanker_shp.crs, geometry="geometry")
    hh_larger_than_zero = hh_pp_gpd[hh_pp_gpd['units_size']>0]

    ### combine total dataframe with gw water ratio
    hh_larger_than_zero2 = hh_larger_than_zero.merge(urban_cwatm_gw, on =['x','y'],how="left")
    hh_larger_than_zero2.replace([np.inf, -np.inf], np.nan, inplace=True)
    hh_larger_than_zero3=hh_larger_than_zero2.fillna(0)
    return hh_larger_than_zero3

In [23]:
cellarea_file_path = basepath + r"/modules/hydro/hydro_files/netcdfs/cellArea.nc"
cellarea = xr.open_dataarray(cellarea_file_path)

adminsegs_df_urban = pd.read_csv(
        basepath + r'/modules/hydro/hydro_inputs/landsurface/waterDemand/adminsegs_Pune_Solapur.csv')
adminsegs = xr.open_dataarray(
        basepath + r'/modules/hydro/hydro_inputs/landsurface/waterDemand/2020-10-14_Agents.nc')
adminsegs_df = adminsegs.to_dataframe().reset_index().round({'lon': 4, 'lat': 4})

lat_lon = pd.read_excel(basepath + r'/modules/hydro/hydro_files/lat_lon.xlsx', sheet_name='lat_lon')

In [25]:
tanker_shp = gpd.read_file(basepath + r"/modules/hydro/hydro_files/shapefiles/tanker/tanker.shp")
Pune_shp = gpd.read_file(basepath + r"/modules/hydro/hydro_files/shapefiles/PMC_Boundary/PMC_PCMC_Ward_Bounda_Outer.shp")

In [26]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (int(j) for j in range(len(NonIrr_GW_path_tt)))

df_ratio = parallel(urban_cwatm_ratio,it,nbrCores)

  return array(a, dtype, copy=False, order=order)


### Vulnerability

In [27]:
def water_vulnerability(threshold, df, var):
    hh_smaller_than_100 = df[df[var]<=threshold]
    percent = np.sum(hh_smaller_than_100['units_size'])/np.sum(df['units_size'])*100
    return percent

In [28]:
up_urban_ssp1 = 10870 # upper 10%
up_urban_ssp3 = 7122 # upper 10%
down_urban_ssp1 = 2848 # lower 10%
down_urban_ssp3 = 1830 # lower 10%

In [29]:
def vulnerability_rich_poor_mid(i):
    """
    We define vulnerability as below 40 lpcd. 
    Since values above 40 are capped at 40 in the model under some interventions, 
    we code vulnerability as ≤39 lpcd to avoid misclassifying capped overuse as vulnerable
    """
    
    p = pd.DataFrame(df_ratio[i], columns=["x", "y", "units", "size", "income","piped_m3/mo", "well_m3/mo", "tanker_m3/mo","tanker_tier2_m3/mo",
                 "electr_kWh/mo", "food_INR/mo", "electr_for_water_kWh/mo",
                 "total_water_m3/mo", "non_piped_m3/mo", "non_piped_percent",
                 "electr_for_water_per_unit", "piped_per_unit", "well_per_unit",
                 "non_piped_per_unit", "non_piped_pct_per_unit", "total_water_per_unit",
                 "piped_expenditure_per_unit", "well_expenditure_per_unit",
                 "tanker_expenditure_per_unit","tanker_tier2_expenditure_per_unit", "expenditure_per_unit",
                 "electricity price", "electricity expenditure","well type", 'hh_income', 'category', 'units_size', 'income_pp',
                 'index_x', 'sw_m3', 'sw', 'ratio', 'geometry', 'tanker_ratio','tanker_tier2_ratio',
                 'index_y', 'gw_m3', 'gw', 'ratio_gw'])


    p['piped_water_lpcd_percapita'] = p['piped_m3/mo']/p['units_size']*1000/30.417
    p['tanker_water_lpcd_percapita'] = p['tanker_m3/mo']/p['units_size']*1000/30.417
    p['well_water_lpcd_percapita'] = p['well_m3/mo']/p['units_size']*1000/30.417
    p['tanker_tier2_water_lpcd_percapita'] = p['tanker_tier2_m3/mo']/p['units_size']*1000/30.417
    
    
    p['cwatm_piped_water_lpcd_percapita'] = p['piped_water_lpcd_percapita'] * p['ratio']
    p['cwatm_well_water_lpcd_percapita'] = p['well_water_lpcd_percapita'] * p['ratio_gw']
    
    p['cwatm_tanker_water_lpcd_percapita_rural'] = p['tanker_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_urban'] = p['tanker_water_lpcd_percapita'] 
    p['cwatm_tanker_tier2_water_lpcd_percapita_urban'] = p['tanker_tier2_water_lpcd_percapita'] 
    
    p['cwatm_total_water_lpcd_percapita_urban'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_urban'] + p['cwatm_tanker_tier2_water_lpcd_percapita_urban']
    p['cwatm_total_water_lpcd_percapita_rural'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_rural']

    p['cwatm_piped_water_m3/mo'] = p['piped_m3/mo'] * p['ratio']
    p['cwatm_well_water_m3/mo'] = p['well_m3/mo'] * p['ratio_gw']

    p_gpd = gpd.GeoDataFrame(p, crs=Pune_shp.crs, geometry="geometry")

    gpd_pune = gpd.sjoin(p_gpd, Pune_shp[['geometry']],op="within")

    p_gpd1 = p_gpd[~p_gpd.index.isin(gpd_pune.index)]
    rural_gpd = p_gpd1
    urban_gpd = gpd_pune


    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario1') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario2'):        
        up_urban = up_urban_ssp1
        down = down_urban_ssp1
    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario3') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario4'):        
        up_urban = up_urban_ssp3
        down = down_urban_ssp3
        
    up_rural = rural_gpd.income_pp.max()/2 - 1561
    urban_rich = urban_gpd[urban_gpd['income_pp']>=up_urban]
    urban_poor = urban_gpd[urban_gpd['income_pp']<=down]
    urban_mid = urban_gpd[urban_gpd['income_pp']>down][urban_gpd[urban_gpd['income_pp']>down]['income_pp']<up_urban]
    rural_rich = rural_gpd[rural_gpd['income_pp']>=up_rural]
    rural_poor = rural_gpd[rural_gpd['income_pp']<=down]
    rural_mid = rural_gpd[rural_gpd['income_pp']>down][rural_gpd[rural_gpd['income_pp']>down]['income_pp']<up_rural]
    
    return water_vulnerability(39,urban_rich, 'cwatm_total_water_lpcd_percapita_urban'), water_vulnerability(39,urban_poor,'cwatm_total_water_lpcd_percapita_urban'),water_vulnerability(39,urban_mid, 'cwatm_total_water_lpcd_percapita_urban'),water_vulnerability(39,rural_rich,'cwatm_total_water_lpcd_percapita_rural'),water_vulnerability(39,rural_poor,'cwatm_total_water_lpcd_percapita_rural'), water_vulnerability(39,rural_mid,'cwatm_total_water_lpcd_percapita_rural')


In [30]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (int(j) for j in range(len(NonIrr_GW_path_tt)))

vul_urbanrich, vul_urbanpoor,vul_urbanmid, vul_ruralrich, vul_ruralpoor,vul_ruralmid= parallel(vulnerability_rich_poor_mid,it,nbrCores)

### Stress duration

In [31]:
def runs_of_ones_array(bits):
    # make sure all runs of ones are well-bounded
    bounded = np.hstack(([0], bits, [0]))
    # get 1 at run starts and -1 at run ends
    difs = np.diff(bounded)
    run_starts, = np.where(difs > 0)
    run_ends, = np.where(difs < 0)
    return run_ends - run_starts

def duration(df):
    df['duration'] = 0
    for j in range(df.shape[0]):
        x = runs_of_ones_array(df.iloc[j,2+12*(i-1):2+12*i])
        x = 0 if x.size==0 else x
        rich.loc[j,'duration'] = np.nanmax(x) 
    item = np.sum((rich.iloc[:,-1] * rich.iloc[:,1]))/np.sum(rich.iloc[:,1])

In [32]:
def water_stress(threshold, df0, var):
    df = df0.copy()
    df.loc[df[var]>threshold, 'vul'] = 0
    df.loc[df[var]<=threshold, 'vul'] = 1
    return df[['x','y','category','units_size','vul']]

In [33]:
def stress_duration_vul(i): 
    """
    We define stress duration as duration of water use below 40 lpcd. 
    Since values above 40 are capped at 40 in the model under some interventions, 
    we code it as ≤39 lpcd to avoid misclassifying capped overuse as vulnerable
    """
    
    p = pd.DataFrame(df_ratio[i], columns=["x", "y", "units", "size", "income","piped_m3/mo", "well_m3/mo", "tanker_m3/mo","tanker_tier2_m3/mo",
                 "electr_kWh/mo", "food_INR/mo", "electr_for_water_kWh/mo",
                 "total_water_m3/mo", "non_piped_m3/mo", "non_piped_percent",
                 "electr_for_water_per_unit", "piped_per_unit", "well_per_unit",
                 "non_piped_per_unit", "non_piped_pct_per_unit", "total_water_per_unit",
                 "piped_expenditure_per_unit", "well_expenditure_per_unit",
                 "tanker_expenditure_per_unit","tanker_tier2_expenditure_per_unit", "expenditure_per_unit",
                 "electricity price", "electricity expenditure","well type", 'hh_income', 'category', 'units_size', 'income_pp',
                 'index_x', 'sw_m3', 'sw', 'ratio', 'geometry', 'tanker_ratio','tanker_tier2_ratio',
                 'index_y', 'gw_m3', 'gw', 'ratio_gw'])

    p['piped_water_lpcd_percapita'] = p['piped_m3/mo']/p['units_size']*1000/30.417
    p['tanker_water_lpcd_percapita'] = p['tanker_m3/mo']/p['units_size']*1000/30.417
    p['well_water_lpcd_percapita'] = p['well_m3/mo']/p['units_size']*1000/30.417
    p['tanker_tier2_water_lpcd_percapita'] = p['tanker_tier2_m3/mo']/p['units_size']*1000/30.417

    p['cwatm_piped_water_lpcd_percapita'] = p['piped_water_lpcd_percapita'] * p['ratio']
    p['cwatm_well_water_lpcd_percapita'] = p['well_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_rural'] = p['tanker_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_urban'] = p['tanker_water_lpcd_percapita'] 
    p['cwatm_tanker_tier2_water_lpcd_percapita_urban'] = p['tanker_tier2_water_lpcd_percapita'] 

    p['cwatm_total_water_lpcd_percapita_urban'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_urban'] + p['cwatm_tanker_tier2_water_lpcd_percapita_urban']
    p['cwatm_total_water_lpcd_percapita_rural'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_rural']
    
    p['cwatm_piped_water_m3/mo'] = p['piped_m3/mo'] * p['ratio']
    p['cwatm_well_water_m3/mo'] = p['well_m3/mo'] * p['ratio_gw']

    p_gpd = gpd.GeoDataFrame(p, crs=Pune_shp.crs, geometry="geometry")

    gpd_pune = gpd.sjoin(p_gpd, Pune_shp[['geometry']],op="within")
    
    p_gpd1 = p_gpd[~p_gpd.index.isin(gpd_pune.index)]
    rural_gpd = p_gpd1 
    urban_gpd = gpd_pune

    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario1') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario2'):        
        up_urban = up_urban_ssp1
        down = down_urban_ssp1
    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario3') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario4'):        
        up_urban = up_urban_ssp3
        down = down_urban_ssp3 
    
    up_rural = rural_gpd.income_pp.max()/2 - 1561
    urban_rich = urban_gpd[urban_gpd['income_pp']>=up_urban]
    urban_poor = urban_gpd[urban_gpd['income_pp']<=down]
    urban_mid = urban_gpd[urban_gpd['income_pp']>down][urban_gpd[urban_gpd['income_pp']>down]['income_pp']<up_urban]
    rural_rich = rural_gpd[rural_gpd['income_pp']>=up_rural]
    rural_poor = rural_gpd[rural_gpd['income_pp']<=down]
    rural_mid = rural_gpd[rural_gpd['income_pp']>down][rural_gpd[rural_gpd['income_pp']>down]['income_pp']<up_rural]
       
    return water_stress(39,urban_rich, 'cwatm_total_water_lpcd_percapita_urban'), water_stress(39,urban_poor,'cwatm_total_water_lpcd_percapita_urban'),water_stress(39,urban_mid, 'cwatm_total_water_lpcd_percapita_urban'),water_stress(39,rural_rich,'cwatm_total_water_lpcd_percapita_rural'),water_stress(39,rural_poor,'cwatm_total_water_lpcd_percapita_rural'), water_stress(39,rural_mid,'cwatm_total_water_lpcd_percapita_rural')


In [34]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (int(j) for j in range(len(NonIrr_GW_path_tt)))

Urbanrich, Urbanpoor,Urbanmid, Ruralrich, Ruralpoor,Ruralmid= parallel(stress_duration_vul,it,nbrCores)

  return array(a, dtype, copy=False, order=order)


In [35]:
urbanrich_mid = pd.concat([pd.DataFrame(Urbanrich[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Urbanrich[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)
urbanpoor_mid = pd.concat([pd.DataFrame(Urbanpoor[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Urbanpoor[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)
urbanmid_mid = pd.concat([pd.DataFrame(Urbanmid[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Urbanmid[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)

ruralrich_mid = pd.concat([pd.DataFrame(Ruralrich[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Ruralrich[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)
ruralpoor_mid = pd.concat([pd.DataFrame(Ruralpoor[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Ruralpoor[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)
ruralmid_mid = pd.concat([pd.DataFrame(Ruralmid[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category'])[~pd.DataFrame(Ruralmid[i],columns=['x','y','category','units_size','vul']).set_index(['x','y','category']).index.duplicated()] for i in range(len(NonIrr_GW_path_tt))], ignore_index=False, axis=1)

In [36]:
def runs_of_ones_array(bits):
    # make sure all runs of ones are well-bounded
    bounded = np.hstack(([0], bits, [0]))
    # get 1 at run starts and -1 at run ends
    difs = np.diff(bounded)
    run_starts, = np.where(difs > 0)
    run_ends, = np.where(difs < 0)
    return run_ends - run_starts

def stress_duration2(df0):
    stress = []
    
    for i in range(1, 1+np.int(np.floor(len(NonIrr_GW_path_tt)/12)),1):
        df=df0.iloc[:,np.append(24*i-24, np.arange(1,len(NonIrr_GW_path_tt)*2,2))].reset_index().copy()
        df['duration'] = 0
        for j in range(df.shape[0]):
            x = runs_of_ones_array(df.iloc[j,4+12*(i-1):4+12*i].fillna(0))
            x = 0 if x.size==0 else x
            df.loc[j,'duration'] = np.nanmax(x) 
        item = np.sum((df.iloc[:,-1] * df.loc[:,'units_size']))/np.sum(df.loc[:,'units_size'])
        stress.append(item)
    return stress

In [37]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (j for j in [urbanrich_mid, urbanpoor_mid, urbanmid_mid,ruralrich_mid, ruralpoor_mid, ruralmid_mid])

dur_urbanrich,dur_urbanpoor,dur_urbanmid,dur_ruralrich,dur_ruralpoor,dur_ruralmid= parallel(stress_duration2,it,nbrCores)

### Gini coefficient

In [38]:
def gini_coefficient_from_dataframe(df, col):
    # Sort the DataFrame by the cumulative percentage of population
    df_sort = df.copy()
    df_sort.loc[:,'sort'] = df_sort.loc[:,col]
    test = df_sort.sort_values(by=['sort']).reset_index().copy()
    test.loc[:,'pop_cum'] = test.loc[:,'units_size'].cumsum()/test.loc[:,'units_size'].sum()
    test.loc[:,'cum'] = (test.loc[:,col]*test.loc[:,'units_size']).cumsum()/(test.loc[:,col]*test.loc[:,'units_size']).sum()
    
    # Convert the DataFrame columns to numpy arrays
    x_values = test["pop_cum"].values
    y_values = test['cum'].values

    # Calculate the area between the Lorenz curve and the perfect equality line
    area_between_curve = np.trapz(y_values, x=x_values)

    # Calculate the area under the perfect equality line (the area of the triangle)
    area_under_perfect_equality = 0.5

    # Calculate the Gini coefficient
    gini_coeff = (area_under_perfect_equality - area_between_curve) / area_under_perfect_equality
    

    return gini_coeff

In [39]:
def gini_income(i): 
    p = pd.DataFrame(df_ratio[i], columns=["x", "y", "units", "size", "income","piped_m3/mo", "well_m3/mo", "tanker_m3/mo","tanker_tier2_m3/mo",
                 "electr_kWh/mo", "food_INR/mo", "electr_for_water_kWh/mo",
                 "total_water_m3/mo", "non_piped_m3/mo", "non_piped_percent",
                 "electr_for_water_per_unit", "piped_per_unit", "well_per_unit",
                 "non_piped_per_unit", "non_piped_pct_per_unit", "total_water_per_unit",
                 "piped_expenditure_per_unit", "well_expenditure_per_unit",
                 "tanker_expenditure_per_unit","tanker_tier2_expenditure_per_unit", "expenditure_per_unit",
                 "electricity price", "electricity expenditure","well type", 'hh_income', 'category', 'units_size', 'income_pp',
                 'index_x', 'sw_m3', 'sw', 'ratio', 'geometry', 'tanker_ratio','tanker_tier2_ratio',
                 'index_y', 'gw_m3', 'gw', 'ratio_gw'])

    p['piped_water_lpcd_percapita'] = p['piped_m3/mo']/p['units_size']*1000/30.417
    p['tanker_water_lpcd_percapita'] = p['tanker_m3/mo']/p['units_size']*1000/30.417
    p['well_water_lpcd_percapita'] = p['well_m3/mo']/p['units_size']*1000/30.417
    p['tanker_tier2_water_lpcd_percapita'] = p['tanker_tier2_m3/mo']/p['units_size']*1000/30.417

    p['cwatm_piped_water_lpcd_percapita'] = p['piped_water_lpcd_percapita'] * p['ratio']
    p['cwatm_well_water_lpcd_percapita'] = p['well_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_rural'] = p['tanker_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_urban'] = p['tanker_water_lpcd_percapita'] 
    p['cwatm_tanker_tier2_water_lpcd_percapita_urban'] = p['tanker_tier2_water_lpcd_percapita'] 
    
    p['cwatm_total_water_lpcd_percapita_urban'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_urban'] + p['cwatm_tanker_tier2_water_lpcd_percapita_urban']
    p['cwatm_total_water_lpcd_percapita_rural'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_rural']
    
    p['cwatm_piped_water_m3/mo'] = p['piped_m3/mo'] * p['ratio']
    p['cwatm_well_water_m3/mo'] = p['well_m3/mo'] * p['ratio_gw']

    p_gpd = gpd.GeoDataFrame(p, crs=Pune_shp.crs, geometry="geometry")

    gpd_pune = gpd.sjoin(p_gpd, Pune_shp[['geometry']],op="within")

    p_gpd1 = p_gpd[~p_gpd.index.isin(gpd_pune.index)]
    rural_gpd = p_gpd1 
    urban_gpd = gpd_pune


    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario1') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario2'):        
        up_urban = up_urban_ssp1
        down = down_urban_ssp1
    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario3') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario4'):        
        up_urban = up_urban_ssp3
        down = down_urban_ssp3
    
    up_rural = rural_gpd.income_pp.max()/2 - 1561
    urban_rich = urban_gpd[urban_gpd['income_pp']>=up_urban]
    urban_poor = urban_gpd[urban_gpd['income_pp']<=down]
    urban_mid = urban_gpd[urban_gpd['income_pp']>down][urban_gpd[urban_gpd['income_pp']>down]['income_pp']<up_urban]
    rural_rich = rural_gpd[rural_gpd['income_pp']>=up_rural]
    rural_poor = rural_gpd[rural_gpd['income_pp']<=down]
    rural_mid = rural_gpd[rural_gpd['income_pp']>down][rural_gpd[rural_gpd['income_pp']>down]['income_pp']<up_rural]

    gini_urban=gini_coefficient_from_dataframe(urban_gpd,'cwatm_total_water_lpcd_percapita_urban')
    gini_urbanpiped=gini_coefficient_from_dataframe(urban_gpd,'cwatm_piped_water_lpcd_percapita')    
    gini_urbanwell=gini_coefficient_from_dataframe(urban_gpd,'cwatm_well_water_lpcd_percapita')    
    gini_urbantanker1=gini_coefficient_from_dataframe(urban_gpd,'cwatm_tanker_water_lpcd_percapita_urban')  
    gini_urbantanker2=gini_coefficient_from_dataframe(urban_gpd,'cwatm_tanker_tier2_water_lpcd_percapita_urban')     

    return gini_urban, gini_urbanpiped, gini_urbanwell, gini_urbantanker1, gini_urbantanker2

In [40]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (int(j) for j in range(len(NonIrr_GW_path_tt)))

Gini_urban, Gini_urbanpiped, Gini_urbanwell, Gini_urbantanker1, Gini_urbantanker2= parallel(gini_income,it,nbrCores)

### Sectoral water use

In [41]:
def regional_water_use_lpcd_rich_mid_poor(i):
    p = pd.DataFrame(df_ratio[i], columns=["x", "y", "units", "size", "income","piped_m3/mo", "well_m3/mo", "tanker_m3/mo","tanker_tier2_m3/mo",
                 "electr_kWh/mo", "food_INR/mo", "electr_for_water_kWh/mo",
                 "total_water_m3/mo", "non_piped_m3/mo", "non_piped_percent",
                 "electr_for_water_per_unit", "piped_per_unit", "well_per_unit",
                 "non_piped_per_unit", "non_piped_pct_per_unit", "total_water_per_unit",
                 "piped_expenditure_per_unit", "well_expenditure_per_unit",
                 "tanker_expenditure_per_unit","tanker_tier2_expenditure_per_unit", "expenditure_per_unit",
                 "electricity price", "electricity expenditure","well type", 'hh_income', 'category', 'units_size', 'income_pp',
                 'index_x', 'sw_m3', 'sw', 'ratio', 'geometry', 'tanker_ratio','tanker_tier2_ratio',
                 'index_y', 'gw_m3', 'gw', 'ratio_gw'])

    p['piped_water_lpcd_percapita'] = p['piped_m3/mo']/p['units_size']*1000/30.417
    p['tanker_water_lpcd_percapita'] = p['tanker_m3/mo']/p['units_size']*1000/30.417
    p['tanker_tier2_water_lpcd_percapita'] = p['tanker_tier2_m3/mo']/p['units_size']*1000/30.417
    p['well_water_lpcd_percapita'] = p['well_m3/mo']/p['units_size']*1000/30.417

    p['cwatm_piped_water_lpcd_percapita'] = p['piped_water_lpcd_percapita'] * p['ratio']
    p['cwatm_well_water_lpcd_percapita'] = p['well_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_rural'] = p['tanker_water_lpcd_percapita'] * p['ratio_gw']
    p['cwatm_tanker_water_lpcd_percapita_urban'] = p['tanker_water_lpcd_percapita'] + p['tanker_tier2_water_lpcd_percapita'] 
    
    p['cwatm_piped_water_m3/mo'] = p['piped_m3/mo'] * p['ratio']
    p['cwatm_well_water_m3/mo'] = p['well_m3/mo'] * p['ratio_gw']
    
    p['cwatm_total_water_lpcd_percapita_urban'] = p['cwatm_piped_water_lpcd_percapita'] +  p['cwatm_well_water_lpcd_percapita'] +p['cwatm_tanker_water_lpcd_percapita_urban']

    p_gpd = gpd.GeoDataFrame(p, crs=Pune_shp.crs, geometry="geometry")

    gpd_pune = gpd.sjoin(p_gpd, Pune_shp[['geometry']],op="within")

    p_gpd1 = p_gpd[~p_gpd.index.isin(gpd_pune.index)]
    rural_gpd = p_gpd1 
    urban_gpd = gpd_pune
    

    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario1') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario2'):        
        up_urban = up_urban_ssp1
        down = down_urban_ssp1
    if (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario3') or (NonIrr_GW_path_tt[i].split('/')[-3].split('_')[2] == 'scenario4'):        
        up_urban = up_urban_ssp3
        down = down_urban_ssp3
    
    urban_rich = urban_gpd[urban_gpd['income_pp']>=up_urban]
    urban_poor = urban_gpd[urban_gpd['income_pp']<=down]
    urban_mid = urban_gpd[urban_gpd['income_pp']>down][urban_gpd[urban_gpd['income_pp']>down]['income_pp']<up_urban]

    piped_urbanrich, piped_urbanpoor,piped_urbanmid = (np.sum(df.loc[:,'units_size'] *df.loc[:,'cwatm_piped_water_lpcd_percapita'])/np.sum(df.loc[:,'units_size']) for df in (urban_rich,urban_poor,urban_mid))
    well_hh_urbanrich, well_urbanpoor, well_urbanmid  = (np.sum(df.loc[:,'units_size'] *df.loc[:,'cwatm_well_water_lpcd_percapita'])/np.sum(df.loc[:,'units_size']) for df in (urban_rich,urban_poor,urban_mid))
    tanker_urbanrich, tanker_urbanpoor,tanker_urbanmid = (np.sum(df.loc[:,'units_size'] *df.loc[:,'cwatm_tanker_water_lpcd_percapita_urban'])/np.sum(df.loc[:,'units_size']) for df in (urban_rich,urban_poor,urban_mid))
    tanker_tier2_urbanrich, tanker_tier2_urbanpoor, tanker_tier2_urbanmid = (np.sum(df.loc[:,'units_size'] *df.loc[:,'cwatm_total_water_lpcd_percapita_urban'])/np.sum(df.loc[:,'units_size']) for df in (urban_rich,urban_poor,urban_mid))

    return piped_urbanrich, piped_urbanpoor,piped_urbanmid, well_hh_urbanrich, well_urbanpoor, well_urbanmid, tanker_urbanrich, tanker_urbanpoor,tanker_urbanmid,tanker_tier2_urbanrich, tanker_tier2_urbanpoor,tanker_tier2_urbanmid


In [42]:
import psutil

nbrCores= psutil.cpu_count(logical=False)
print(str(nbrCores))

jobs = distrib_task(0,len(NonIrr_GW_path_tt)-1,nbrCores)

it = (int(j) for j in range(len(NonIrr_GW_path_tt)))

Piped_urbanrich, Piped_urbanpoor,Piped_urbanmid, Well_hh_urbanrich, Well_urbanpoor, Well_urbanmid, Tanker_urbanrich, Tanker_urbanpoor,Tanker_urbanmid, Tanker_tier2_urbanrich, Tanker_tier2_urbanpoor,Tanker_tier2_urbanmid= parallel(regional_water_use_lpcd_rich_mid_poor,it,nbrCores)

### save output

In [60]:
vul = [[np.mean(df[12*i:12*i+12]) for i in range(0,int(len(NonIrr_GW_path_tt)/12),1)] for df in (vul_urbanrich, vul_urbanpoor,vul_urbanmid,vul_ruralrich, vul_ruralpoor,vul_ruralmid)]
gini = [[np.mean(df[12*i:12*i+12]) for i in range(0,int(len(NonIrr_GW_path_tt)/12),1)] for df in [Gini_urban, Gini_urbanpiped, Gini_urbanwell, Gini_urbantanker1, Gini_urbantanker2]]
stress = [df for df in (dur_urbanrich,dur_urbanpoor,dur_urbanmid,dur_ruralrich,dur_ruralpoor,dur_ruralmid)]
use = [[np.mean(df[12*i:12*i+12]) for i in range(0,int(len(NonIrr_GW_path_tt)/12),1)] for df in (Piped_urbanrich, Piped_urbanpoor,Piped_urbanmid,
                                                                     Well_hh_urbanrich, Well_urbanpoor, Well_urbanmid,
                                                                     Tanker_urbanrich, Tanker_urbanpoor,Tanker_urbanmid,
                                                                     Tanker_tier2_urbanrich, Tanker_tier2_urbanpoor, Tanker_tier2_urbanmid)]

In [62]:
basepath = r"/scratch/users/awan005/bhima_mas_code"
path = basepath + r"/outputs/data_processed/strategic_intervention_combination" +"/"
pd.DataFrame([vul[0],vul[2],vul[1],vul[3],vul[5],vul[4]]).set_index([pd.Index(['rich','mid','poor','rich','mid','poor'])]).to_csv(path + r"vul_strategic_combination.csv")
pd.DataFrame([gini[0],gini[1],gini[2],gini[3],gini[4]]).set_index([pd.Index(['total','piped','well','tanker1','tanker2'])]).to_csv(path + r"gini_strategic_combination.csv")
pd.DataFrame([stress[0],stress[2],stress[1],stress[3],stress[5],stress[4]]).set_index([pd.Index(['rich','mid','poor','rich','mid','poor'])]).to_csv(path + r"stress_strategic_combination.csv")
pd.DataFrame([use[0],use[2],use[1],use[3],use[5],use[4],use[6],use[8],use[7],use[9],use[11],use[10]]).set_index([pd.Index(['piped_rich','piped_mid','piped_poor','well_rich','well_mid','well_poor','tanker_rich','tanker_mid','tanker_poor','tanker_rich','tanker_mid','tanker_poor'])]).to_csv(path + r"use_strategic_combination.csv")