In [1]:
from SD_IB_IRP_PPenv import steroid_IRP
from Policies import policies
import matplotlib.pyplot as plt
import scipy.stats as st

  '''


# Policy Evaluation

This Notebook has a complete Policy Evaluation function for the Stochastic-Dynamic Inventory-Routing-Problem with Perishable Products. First, the main parameters of the problem and the environment must be set. All the main customizable parameters are below.

In [2]:
#################################   Environment's parameters   #################################
# Random seed
rd_seed = 0

# SD-IB-IRP-PP model's parameters
backorders = 'backorders'
stochastic_parameters = ['q','d']

# Feature's parameters
look_ahead = ['q','d']
historical_data = ['*']

# Action's parameters
validate_action = True
warnings = False

# Other parameters
num_episodes = 1
env_config = { 'M': 10, 'K': 10, 'T': 7,  'F': 4, 
               'S': 4,  'LA_horizon': 3}

q_params = {'distribution': 'c_uniform', 'min': 6, 'max': 20}
d_params = {'distribution': 'log-normal', 'mean': 2, 'stdev': 0.5}

p_params = {'distribution': 'd_uniform', 'min': 20, 'max': 60}
h_params = {'distribution': 'd_uniform', 'min': 20, 'max': 60}

#################################   Environment's parameters   #################################

Next, the policy evaluation is defined. This function only takes one parameter, the number of episodes that will be runned of the environment. All the policies to be evaluated must be in the 'Policies.py' file. Any policy used must be able to receive the state, the additional information in _ and the environment as a parameters. Also, the policy function must return an action in the format defined in the Toying.ipynb.

In [7]:
'''

'''
def Policy_evaluation(num_episodes = 1000):
    
    rewards = {}
    states = {}
    real_actions = {}
    backorders = {}
    la_decisions = {}
    realized_dem = {}
    q_sample = {}
    tws = {}
    env = steroid_IRP( look_ahead = look_ahead, 
                       historical_data = historical_data, 
                       backorders = backorders,
                       stochastic_parameters = stochastic_parameters, 
                       env_config = env_config)

    policy = policies()

    for episode in range(2):

        state, _ = env.reset(return_state = True, rd_seed = rd_seed, 
          q_params = q_params, 
          p_params = p_params,
          d_params = d_params,
          h_params = h_params)
        done = False

        while not done:
            
            states[episode,env.t] = state
            action, la_dec = policy.stochastic_rolling_horizon(state, _, env)
            print(state)
            print(action)
            state, reward, done, real_action, _,  = env.step(action, validate_action = validate_action, warnings = warnings)

            real_actions[episode,env.t] = real_action
            backorders[episode,env.t] = _["backorders"]
            rewards[episode,env.t] = reward
            la_decisions[episode,env.t] = la_dec
            realized_dem[episode,env.t] = env.W_t["d"]
            q_sample[episode,env.t] = [_["sample_paths"]["q"][0,s] for s in env.Samples]
            if done:
                tws[episode,env.t] = 1
            else:
                tws[episode,env.t] = _["sample_path_window_size"]
            
    iterables = (env.Suppliers, env.Products, env.Samples, env.M_kt, env.O_k, env.p_t, env.Horizon)
    costs = (env.c, env.h_t, env.p_t)

    return rewards, states, real_actions, backorders, la_decisions, realized_dem, q_sample, tws, iterables, costs

In [None]:
rewards, states, real_actions, backorders, la_decisions, realized_dem, q_sample, tws, iterables, costs = Policy_evaluation(num_episodes = num_episodes)

In [None]:
def get_historical_decisions(episode, states, real_actions, backorders, K, T):
    # Initial Inventory level for product k, aged o
    ii_0 = {t:states[episode,t] for t in T}
    # Routing Decisions
    rout = {t:real_actions[episode,t][0] for t in T}
    # Purchasing decisions
    purch = {t:real_actions[episode,t][1] for t in T}
    # Backorders
    back = {t:{k:backorders[episode,t][k] if k in backorders[episode,t] else 0 for k in K} for t in T}

    return ii_0, rout, purch, back

def get_lookahead_decisions(episode, day, la_decisions, T):
    # Initial Inventory level
    ii_0_la = {(t):la_decisions[episode,day][0][t-day] for t in T}
    # Purchase Decisions
    purch_la = {(t):la_decisions[episode,day][1][t-day] for t in T}
    # Backorders
    back_la = {(t):la_decisions[episode,day][2][t-day] for t in T}

    return ii_0_la, purch_la, back_la

def charts_max_axis_values(episode, states, real_actions, backorders, la_decisions, tws, iterables, costs):
    
    M, K, S, M_kt, O_k, p, T = iterables

    ''' Realized, Historical Decisions '''
    ii_0, rout, purch, back = get_historical_decisions(episode,states,real_actions,backorders, K, T)

    ''' Lookahead Decisions '''
    ii_0_la, purch_la, back_la = {}, {}, {}
    for t in T:
        ii_0_la[t], purch_la[t], back_la[t] = get_lookahead_decisions(episode, t, la_decisions, range(t,t+tws[episode,t]))

    ''' First chart'''
    initi = [sum(ii_0[t][k,o] for k in K for o in range(1,O_k[k]+1)) for t in T]
    repl = [sum(purch[t][i,k] for i in M for k in K) for t in T]
    backo = [sum(back[t][k] for k in K) for t in T]
    # Past and today's realized decisions
    ub1 = max([initi[t]+repl[t]+backo[t]] for t in T)
    initi = {(tau,t,s):sum(ii_0_la[tau][t][s][k,o] for k in K for o in range(1,O_k[k]+1)) for tau in T for t in range(tau,tau+ tws[episode,tau]) for s in S}
    repl = {(tau,t,s):sum(purch_la[tau][t][s][i,k] for i in M for k in K) for tau in T for t in range(tau,tau+ tws[episode,tau]) for s in S}
    backo = {(tau,t,s):sum(back_la[tau][t][s][k] for k in K) for tau in T for t in range(tau,tau+ tws[episode,tau]) for s in S}
    # Future lookahead decisions
    ub1 = max(ub1,*[initi[tau,t,s]+repl[tau,t,s] for tau in T for t in range(tau+1,tau+ tws[episode,tau]) for s in S])
    # Today lookahead decisions
    ub1 = max(ub1,*[initi[tau,tau,s]+repl[tau,tau,s]+backo[tau,tau,s] for tau in T for s in S])
    ub1 = (int(ub1/100)+2)*100

    ''' Second chart '''
    # Max x value
    back_li = [sum(back_la[tau][tau][s][k] for k in K) for s in S for tau in T]
    ub31 = max(back_li)
    ub31 = (int(ub31/10)+1)*10
    # Max value for y axis
    ub32 = 0
    for tau in T:
        hist_b = plt.hist([sum(back_la[tau][tau][s][k] for k in K) for s in S])[0]
        for i in hist_b:
            if i > ub32:
                ub32 = i

    return [ub1,(ub31,ub32)]


def append_routes(rout):
    sups = []
    for i in rout:
        sups += i[1:-1]
    sups = list(set(sups))

    return sups

def quantity_boxplot(ax, suppliers, q_s, K, S, colors, ind):
    boxi = ax.boxplot([sum(q_s[s][i,k] for k in K for s in S)/(len(S)) for i in suppliers],positions=[0.75+1.5*ind],widths=[0.3],vert=True,patch_artist=True,flierprops = dict(markerfacecolor=colors["box_avail"][1], markeredgecolor=colors["box_avail"][0]))
    return boxi

def price_boxplot(ax, suppliers, p, day, K, colors, ind):
    price_list = {i:[p[day][i,k] for k in K if p[day][i,k] < 1e3] for i in suppliers}
    boxi = ax.boxplot([sum(price_list[i])/len(price_list[i]) for i in suppliers],positions=[1.25+1.5*(ind)],widths=[0.3],vert=True,patch_artist=True,flierprops = dict(markerfacecolor=colors["box_prices"][1], markeredgecolor=colors["box_prices"][0]))
    return boxi

def visualize_by_day(episode, day, states, real_actions, backorders, la_decisions, realized_dem, q_sample, tws, iterables, costs, conf_level, max_vals, myopic_actions):
    
    azul = (41/255,122/255,204/255)
    verde = (70/255,145/255,57/255)
    naranja = (235/255,140/255,68/255)
    morado = (99/255,45/255,235/255)
    rosado = (199/255,93/255,169/255)

    colors = {"hold":[naranja,"palegreen"],
                   "back":[azul,"cyan"],
                   "purch":[verde,"palegreen"],
                   "rout":[rosado,"violet"],
                   "dem":[morado,"mediumpurple"],
                   "box_avail":[(91/255,179/255,77/255),(213/255,230/255,123/255)],
                   "box_prices":[(181/255,21/255,0/255),(223/255,159/255,156/255)]}

    M, K, S, M_kt, O_k, p, T = iterables

    ''' Realized, Historical Decisions '''
    hist_T = range(day+1)
    ii_0, rout, purch, back = get_historical_decisions(episode,states,real_actions,backorders, K, hist_T)

    ''' Realized demand '''
    d = {t:realized_dem[episode,t] for t in hist_T}

    ''' Look-ahead Decisions '''
    la_T = range(day,day+tws[episode,day])
    ii_0_la, purch_la, back_la = get_lookahead_decisions(episode, day, la_decisions, la_T)

    ''' Quantity sample paths '''
    q_s = {s:q_sample[day,s] for s in S}

    ''' Myopic algorithm decisions '''
    rout_myopic = {t:myopic_actions[episode,t][0] for t in T}

    spec = {"height_ratios":[1, 1, 1, 1],"hspace":0.25,"bottom":0.1,"top":0.9}
    fig, (ax1,ax2,ax3,ax4) = plt.subplots(nrows=4,ncols=1,figsize=(13,20),gridspec_kw=spec)

    ''' First Chart: Quantities '''
    # Max value for the axis
    ub1 = max_vals[0]

    # Realized decisions
    for t in hist_T:
        initi = sum(ii_0[t][k,o] for k in K for o in range(1,O_k[k]+1))
        repl = sum(purch[t][i,k] for i in M for k in K)
        backo = sum(back[t][k] for k in K)
        if t == day:
            x_adj = 0.2
            wid = 0.4
        else:
            x_adj = 0
            wid = 0.8
        ax1.bar(x=t-x_adj, height=initi, color=colors["hold"][0],width=wid)
        ax1.bar(x=t-x_adj, height=repl, bottom=initi, color=colors["purch"][0],width=wid)
        ax1.bar(x=t-x_adj, height=backo, bottom=initi+repl, color=colors["back"][0], width=wid)

    # Look-ahead decisions
    for t in la_T:
        initi = [sum(ii_0_la[t][s][k,o] for k in K for o in range(1,O_k[k]+1)) for s in S]
        repl = [sum(purch_la[t][s][i,k] for i in M for k in K) for s in S]
        backo = [sum(back_la[t][s][k] for k in K) for s in S]
        if t == day:
            tq = [initi[s]+repl[s]+backo[s] for s in S]
            cin = st.t.interval(alpha=conf_level, df=len(tq)-1, loc=sum(tq)/len(tq), scale=st.sem(tq)) 
            ax1.axvline(x=t+0.2,ymin=cin[0]/ub1,ymax=cin[1]/ub1,color="black",marker="_",mew=1.5,ms=8)
            bot_back = (sum(initi)+sum(repl))/len(S)
            x_adj = 0.2
        else:
            tq = [initi[s]+repl[s] for s in S]
            cin = st.t.interval(alpha=conf_level, df=len(tq)-1, loc=sum(tq)/len(tq), scale=st.sem(tq)) 
            ax1.axvline(x=t-0.2,ymin=cin[0]/ub1,ymax=cin[1]/ub1,color="black",marker="_",mew=1.5,ms=8)
            cin = st.t.interval(alpha=conf_level, df=len(backo)-1, loc=sum(backo)/len(backo), scale=st.sem(backo))
            ax1.axvline(x=t+0.2,ymin=cin[0]/ub1,ymax=cin[1]/ub1,color="black",marker="_",mew=1.5,ms=8)
            bot_back = 0
            x_adj = -0.2
        ax1.bar(x=t+x_adj, height=sum(initi)/len(S), color=colors["hold"][0], width=0.4, alpha=0.5)
        ax1.bar(x=t+x_adj, height=sum(repl)/len(S), bottom=sum(initi)/len(S), color=colors["purch"][0], width=0.4, alpha = 0.5)
        ax1.bar(x=t+0.2, height=sum(backo)/len(S), bottom=bot_back, color=colors["back"][0], width=0.4, alpha=0.5)
    
    # Historical realized demand
    ax1.plot([t for t in hist_T],[sum(d[t][k] for k in K) for t in hist_T],linestyle="-",marker="*",markersize=12,color="black")
    
    # Chart config
    ax1.set_xlim(-0.5,len(T)-0.5)
    ax1.set_ylim(0,ub1)
    ax1.set_xlabel("Time period")
    ax1.set_ylabel("Quantity")
    ax1.bar(x=day,height=0,color=colors["back"][0],label="Backorders")
    ax1.bar(x=day,height=0,color=colors["purch"][0],label="Replenishment")
    ax1.bar(x=day,height=0,color=colors["hold"][0],label="Initial Inv. Level")
    ax1.plot(day+len(T),0,color="black",linestyle="-",marker="*",markersize=9,label="Demand")
    ax1.legend(loc="upper right",ncol=2)

    ''' Second chart: backorders histogram '''
    # Max values for axis
    ub31, ub32 = max_vals[1]
    
    # Backorders histogram
    ax2.hist([sum(back_la[day][s][k] for k in K) for s in S], color=colors["back"][1], edgecolor=colors["back"][0], alpha = 0.5, density = False, label="Backorders")
    
    # Chart config
    ticks = [i for i in range(0,int(ub31+1),int(ub31/10))]
    ax2.set_xticks(ticks=ticks)
    ax2.set_xlabel("Units")
    ax2.set_xlim(-5,ub31+5)
    ax2.set_ylim(0,ub32+1)
    ax2.legend(loc="upper center",ncol=2)

    ''' Third chart: boxplots '''
    
    # Availability boxplots
    suppliers_visited = {"all":M, "st":append_routes(rout[day]), "myo":append_routes(rout_myopic[day])}

    box1 = quantity_boxplot(ax3,suppliers_visited["all"],q_s,K,S,colors,0)
    box2 = quantity_boxplot(ax3,suppliers_visited["st"],q_s,K,S,colors,1)
    box3 = quantity_boxplot(ax3,suppliers_visited["myo"],q_s,K,S,colors,2)
    for boxi in [box1,box2,box3]:
        for item in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
            if item not in ['medians','boxes']:
                plt.setp(boxi[item], color = colors["box_avail"][0])
            elif item == "boxes":
                boxi[item][0].set(color = colors["box_avail"][0])
                boxi[item][0].set(facecolor = colors["box_avail"][1])
            else:
                plt.setp(boxi[item], color = colors["box_avail"][0])

    # Prices boxplots
    ax31 = ax3.twinx()
    box5 = price_boxplot(ax31,suppliers_visited["all"],p,day,K,colors,0)
    box6 = price_boxplot(ax31,suppliers_visited["st"],p,day,K,colors,1)
    box7 = price_boxplot(ax31,suppliers_visited["myo"],p,day,K,colors,2)
    for boxi in [box5,box6,box7]:
        for item in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
            if item not in ['medians','boxes']:
                plt.setp(boxi[item], color = colors["box_prices"][0])
            elif item == "boxes":
                boxi[item][0].set(color = colors["box_prices"][0])
                boxi[item][0].set(facecolor = colors["box_prices"][1])
            else:
                plt.setp(boxi[item], color = colors["box_prices"][0])

    # Chart config
    ax31.yaxis.set_major_formatter("${x:1.0f}")
    ax31.set_ylabel("Price",rotation=270,labelpad=12)
    
    ax3.set_xticks([1,2.5,4,5.5])
    ax3.set_xticklabels(["All Suppliers",f"Stochastic",f"Myopic",f"Random"])
    ax3.set_xlabel("Subsets of Suppliers")
    ax3.set_ylabel("Available Quantity")
    ax3.set_xlim(0.5,6)
    ax3.legend(loc="upper center")

    ''' Fourth chart: Costs '''
    



[1, 2, 3, 6]