In [1]:
import numpy as np
import time
from pandas import read_excel, DataFrame, ExcelWriter
from collections import OrderedDict
from datetime import timedelta
from stodymmoea import *
from platypus import *


def pareto_front_dataframe(pf_l):
    
    # empty Pareto-front dataframe
    pf_df = DataFrame(columns=pfdfcn_l)
    
    # Pareto-front's dam targeted location list
    pfdtl_l = [[dv_l[0].decode(v1.variables[v2]) for v2 in range(d_no)] for v1 in pf_l]
    
    # Pareto-front's dam storage capacity list
    if dscdv_i == True:  # if Integer decision variable
        pfdsc_l = [[dsc_mult*dv_l[1].decode(v1.variables[v2]) for v2 in range(d_no, dv_no)] for v1 in pf_l]
    else:  # if Subset decision variable
        pfdsc_l = [v1.variables[-1] for v1 in pf_l]
    
    # Pareto-front's objective value list
    pov_l = np.array([[v1.objectives[v2] for v2 in range(o_no)] for v1 in pf_l])
    
    # Store Pareto-front in dataframe
    for i_v1, v1 in enumerate(pfdtl_l): 
        dtl_a, dtlri_a = np.unique(pfdtl_l[i_v1], return_index=True)
        disc_a = np.array(pfdsc_l[i_v1])[dtlri_a]
        dda_a = dam_drainage_area(fa_a, dtl_a, sluc_a, slco_a, pa_v)
        dda_tv = 100 * (np.sum(dda_a)/ca_t)
        dnsv_a, dle_a, ssdr_a = dam_deployment_output(sluc_a, slco_a, fa_a, ssi_a, 
                                                      dtl_a, dda_a, disc_a, pa_v, ste_tv, dle_max, csw_kg, s_sg)
        pddsc_l = [*dtl_a, *disc_a, np.sum(disc_a), dda_tv, *pov_l[i_v1], 
                   *dle_a, np.mean(dle_a), 
                   (np.sum(np.power(pov_l[i_v1]-u_p, 2)))**0.5]
        pf_df.loc[i_v1, pfdfcn_l] = pddsc_l
    
    # sorting Pareto-front by dams' targeted location and storage capacity
    pf_df = pf_df.drop_duplicates().sort_values(by=didcn_l+disccn_l).reset_index(drop=True) 
    
    return pf_df


def stream_connection_structure():
    
    """
    This function calculates: 
    (a) all the connnected upstream segments for each stream segment,
    (b) connected order of stream segments from downstream to upstream
    """
    
    # an empty array to store stream link upstream connection
    sluc_a = np.empty(len(sid_a), dtype=object)
    
    # an empty list to store connected order of stream segments from downstream to upstream
    slco_l = []  
    
    # end segment
    sl_mds = sid_a[np.where(nsid_a == (len(nsid_a) + 1))[0]]
    sl_mdsi = np.asscalar(np.where(sid_a == sl_mds)[0])
    sluc_a[sl_mdsi] = sl_mds
    slco_l.append(sl_mds)
    
    while True:  
        if len(sl_mds) == 1:  # detemine next downstream segments of end segment and store the values
            sl_nuci = np.where(nsid_a == sl_mds)[0]
            sl_mds = sid_a[sl_nuci]
            sluc_a[sl_mdsi] = np.append(sluc_a[sl_mdsi], sl_mds)
            slco_l.append(sl_mds) 
        else:  # update next donwstream segments and store the values
            sl_nuci = np.array([np.where(nsid_a == v1)[0] for v1 in sl_mds])
            sl_nuc = [sid_a[v2] if len(v2) != 0 else v2 for v2 in sl_nuci] 
            for v3, v4 in zip(sl_mds, sl_nuc):
                if len(v4) != 0:
                    sluc_a[v3-1] = np.append(sid_a[v3-1], v4)
                else:
                    sluc_a[v3-1] = v4
            for v5 in sl_mds:  # iterate updated next downstream segments and store the values
                sl_uci = np.array([i_v6 for i_v6, v6 in enumerate(sluc_a) 
                                   if v6 is not None and v5 in v6 and i_v6 != v5 - 1])
                for v7 in sl_uci:
                    v7_uuc = np.append(sluc_a[v7], sluc_a[v5-1])
                    v7_usp = np.unique(v7_uuc, return_index=True)
                    sluc_a[v7] = v7_uuc[np.sort(v7_usp[1])] 
            sl_nenuc = [sid_a[v8] for v8 in sl_nuci if len(v8) != 0]
            if len(sl_nenuc) != 0:  # continue if non-empty next upstream connection is found
                sl_mds = np.concatenate(sl_nenuc)
                slco_l.append(sl_mds)
            else:  # otherwise break the loop 
                break 
    
    # connected order of stream segments from downstream to upstream
    slco_a = np.array(slco_l)[::-1]  
    
    return sluc_a, slco_a  # return the outputs


pa_v = (20**2)/10**6  # pixel area value in km^2
ste_tv = 0.1  # Sediment Trapping Efficiency threshold value
dle_min, dle_max = (20, 30)  # dam life expectancy minimum and maximum values
dsc_mult = 10**3  # dam initial storage capacity multiplier for Integer decision variables
csw_kg, s_sg = (907.185, 2650)  # connversion of sediment unit to Kg and sediment specific graity
eps_v = 0.001  # epsilon value

cpn_d = {"Majiagou": 34915, 
         "Shejiagou": 12419}  # catchment pixel number dictionary
cdn_d = {"Majiagou": 10, 
         "Shejiagou": 5}  # catchment dam number dictionary
cdscul_d = {"Majiagou": 700000, 
            "Shejiagou": 240000}  # catchment dam storage capacity upper limit dictionary
cdsci_d = {"Majiagou": Integer(40, 100), 
           "Shejiagou": Integer(20, 60)}  # catchment dam storage capacity Integer dictionary
cdscs_d = {"Majiagou": list(range(40000, 100000+1, 5000)), 
           "Shejiagou": list(range(20000, 60000+1, 1000))}  # catchment dam storage capacity Subset dictionary


cn_s = "Shejiagou"  # catchment string name
ca_t = cpn_d[cn_s] * pa_v  # catchment area (km^2)
d_no = cdn_d[cn_s]  # number of dams
dsc_ul = cdscul_d[cn_s]  # dam initial storage capacity upper limit

# stream characteristics 
sc_fn = cn_s + "_Stream_Characteristics.xlsx"  # stream characteristics file name
sc_df = read_excel(sc_fn, sheet_name="SC_Data")  # stream characteristics dataframe
sid_a = np.array(sc_df["SID"], dtype=int)  # stream ID array
nsid_a = np.array(sc_df["NSID"], dtype=int)  # next stream ID array
fa_a = np.array(sc_df["DFA"], dtype=int)  # flow accumulation array
ssi_a = np.array(sc_df["SSI"], dtype=float)  # stream sediment input array

# stream link upstream connection and connected order
sluc_a, slco_a = stream_connection_structure()  

# objective, population size, function evaluations, seeds, computer core
o_no, ps_no, fe_no, s_no, cc_no = (4, 50, 1000, 4, 4)  

dscdv_i = True  # Integer type of dam storage capacity is True

# decision variables and other feature 
if dscdv_i == True:  # if dams' storage capacity is Integer
    dv_no = 2 * d_no  # decision variables number
    dv_l = [Integer(1, len(sid_a)), cdsci_d[cn_s]]  #  decision variable list
    epsnsga2_d = {"population_size": ps_no, 
                  "epsilons": eps_v}  # EpsNSGAII features dictionary
else:  # if dams' storage capacity is Subset
    dv_no = d_no + 1  # decision variables number
    dv_l = [Integer(1, len(sid_a)), Subset(cdscs_d[cn_s], d_no)]  #  decision variable list
    epsnsga2_d = {"population_size": ps_no, 
                  "epsilons": eps_v, 
                  "variator": CompoundOperator(HUX(), BitFlip(), SSX(), Replace())}  # EpsNSGAII features dictionary
    
od_l = [Problem.MAXIMIZE, Problem.MINIMIZE, Problem.MINIMIZE, Problem.MINIMIZE]  # objective decision list
cs_l = ["==0", "==0", "==0", "<=0"]  # constraint sign list
u_p = np.array([1, 0, 0, 0])  # Utopian point

# optimization required values and array
ov_t = (dv_l, od_l, cs_l, d_no, dle_min, dle_max, dsc_ul, dsc_mult, pa_v, ste_tv, csw_kg, s_sg)
oa_t = (sluc_a, slco_a, fa_a, ssi_a)

# optimization output and time
if __name__ == "__main__":
    ss_t = time.time()  
    oa_l = [(EpsNSGAII, epsnsga2_d)]  
    op_l = [StoDyM(dv_no, o_no, len(cs_l), ov_t, oa_t)]  
    with ProcessPoolEvaluator(cc_no) as mp_c:  
        or_d = experiment(oa_l, op_l, nfe=fe_no, seeds=s_no, evaluator=mp_c, display_stats=True)  
        se_t = time.time()  
        r_t = se_t - ss_t

# column name list
didcn_l = ["D"+str(v1) for v1 in range(d_no)]  # dam stream ID
disccn_l = ["SC"+str(v1)+"(m^3)" for v1 in range(d_no)]  # dam storage capacity
ocn_l = ["O-LE", "O-SD", "O-SDR,ST", "O-SDR,LT"]  # objective
dlecn_l = ["LE"+str(v1)+"(yr)" for v1 in range(d_no)]  # dam life expectancy
pfdfcn_l = [*didcn_l, *disccn_l, "ITSC(m^3)", "ITDA(%)", *ocn_l,  
            *dlecn_l, "LE_A(yr)", "Distance"]  # Pareto-front dataframe

# write the output
for v1 in or_d.keys():  # iterate algorithm list
    for v2 in or_d[v1].keys():  # iterate problem list
        v2_fn = "_".join(map(str, [cn_s, v1, ps_no, 
                                   str(int(fe_no/10**3))+"k", "LS"]))  # simulation output file name
        v2xl_w = ExcelWriter(v2_fn+".xlsx", engine="xlsxwriter")  # Excel writer object
        v2ms_l = []  # an empty list to merge the output of all seeds
        for i_v3, v3 in enumerate(or_d[v1][v2]):  # iterate seed solution 
            v3fs_l = [v4 for v4 in v3 if v4.feasible]  # feasible solution list
            v2ms_l.extend(v3fs_l)  # merge feasible solutions
        v2pf_l = nondominated(v2ms_l)  # list of non-dominated solutions
        v2pf_df = pareto_front_dataframe(v2pf_l)  # Pareto-front dataframe with detailed charateristics
        v2pf_df.to_excel(v2xl_w, sheet_name="Pareto_Front", index=False)  # write Pareto-efficient set 
        t_df = DataFrame(OrderedDict([("Total", r_t), 
                                      ("HMS_Format", str(timedelta(seconds=round(r_t))))]), 
                         index=[0]).T  # time dataframe
        t_df.to_excel(v2xl_w, sheet_name="Time", index=True, header=False)  # write time
        v2xl_w.save()  # save the excel file