In [None]:
from RLTrain import Opt
import os
import time
#print(Opt())
import pandas as pd

from matplotlib.scale import FuncScale
from RadarGraph import *
plt.rcParams["font.size"]=14.0
import sys
sys.path.append('../')

from visualization_helper import *
from IPython.display import display,HTML,SVG
from action_functions import *

In [None]:
style = "<style>svg{width: 40% !important; height: 100% !important;}</style>"
vals=np.arange(6)/5
names=["%.0f"%(v*100)+"%" for v in vals]

DISTILL_NAMES=["Heat / Cool","Pour DV into B1","Pour B1 into B2","Wait","End Experiment"]

code = create_matrix(DISTILL_ACTIONS,DISTILL_NAMES,names,vals)
svg_header=f'<svg viewBox="-100 0 {2700} 2900" xmlns="http://www.w3.org/2000/svg">'

display(HTML(style +svg_header+code +"</svg>"))

with open(f"Figures\\Distill\\distill_nft.svg","w") as f:
        f.write(svg_header+code +"</svg>")

In [None]:
style = "<style>svg{width: 40% !important; height: 100% !important;}</style>"
vals=np.arange(6)/5
names=["%.0f"%(v*100)+"%" for v in vals]

code = create_matrix(EXTRACT_ACTIONS,EXTRACT_NAMES,names,vals)
svg_header=f'<svg viewBox="-100 0 {2700} 2900" xmlns="http://www.w3.org/2000/svg">'

display(HTML(style +svg_header+code +"</svg>"))

with open(f"Figures\\Extract\\extract2_nft.svg","w") as f:
        f.write(svg_header+code +"</svg>")

# Methods for Trajectory parsing and Visualization

In [None]:
def hashed_trajectories(frame,N=None,N2=None,gamma=1):
    """Turns rollouts into a dictionary of trajectories and counts by hashing episodes based on the actions taken.
    
    Args:
    - frame (dataframe):   Pandas Dataframe containing rollouts
    - N (int):             Number of actions in MultiDiscrete dim 0
    - N2 (int):            Number of actions in MultiDiscrete dim 1
    
    Returns:
    - trajectories (dict): String representations of actions are keys and values are of the form (count,mean_return)
    
    Example: 
    
    >>> print(frame) 
    >>>         InState  Action  Reward  OutState   Done Info  Step
            0  0.631918  [0, 9]     0.0  0.632225  False   {}     0
            1  0.632225  [0, 9]     0.0    0.6319  False   {}     1
            2    0.6319  [4, 4]     0.8    0.6319   True   {}     2
            
    >>> hashed_trajectories(frame)
    >>> {'090944': 1}
    
    """
    min_t,max_t = frame.Step.min(),frame.Step.max()
    mean_act=[]
    
    act = np.stack(frame.Action)  

    trajectories=dict()
    
    if N is None:
        N = np.max(act[:,0])
    act_string=""
    ret=0
    for t,act in enumerate(frame.Action):
        if frame.Step[t]==0:
            act_string=""
            ret=0
        if type(act) is int or len(act.shape)<1:
            act0=int(act)
            act=np.zeros((2,),dtype=np.int32)
            act[0]=act0//N2
            act[1] = act0%N2
        act_string+=(str(act[0])+str(act[1]))
        ret += frame.Reward[t]*gamma**frame.Step[t]
        if frame.Done[t]:
            count,ret0 = trajectories.get(act_string,(0,0))
            trajectories[act_string] = (count+1,(ret+count*ret0)/(count+1))
            
            
    return trajectories



def relabel_trajectory(trajectory:dict,pouring_actions:set,wait_string:str,
                       end_string,bins_per_action:int,default_wait="1",dependencies=dict()):
    """
    Method to re-label a trajectory such that actions that don't do anything are replaced with waiting and
    the end experiment actions all have the same sub-action value
    """
    new_trajectory=""
    center=str(int(bins_per_action//2))
    i_dependencies={dependencies[a]:a for a in dependencies}
    satisfied= {a:0 for a in dependencies}
    
    for action,param in zip(trajectory[::2],trajectory[1::2]):
        
        if param!="0" and action in i_dependencies:
            satisfied[i_dependencies[action]]+=1
        if param=="0" and action in pouring_actions:
            #Pouring zero amount is the same as waiting
            action=wait_string
            param=default_wait
        elif action in dependencies and not satisfied[action]:
            action=wait_string
            param=default_wait
        #standardize the end experiment action
        if action == end_string:
            param=center
            
        new_trajectory+=action+param
                
    return new_trajectory

def relabel_trajectories(trajectories:dict,pouring_actions:set,wait_string:str,
                         end_string,bins_per_action:int,default_wait="1",dependencies=dict(),use_return=False):
    """
    Recompute a trajectory dictionary using relabel_trajectory
    """
    new_traj=dict()
    #make function calls easier
    relabel=lambda x: relabel_trajectory(x,pouring_actions,wait_string,end_string,bins_per_action,default_wait,dependencies)
    
    for key in trajectories:
        key2 = relabel(key)
        #get new count and average return
        count0,ret0 = trajectories[key]
        count,ret = new_traj.get(key2,(0,0))
        new_traj[key2] = (count0+count,(ret*count+count0*ret0)/(count0+count))
            
    return new_traj

In [None]:
def trunclen(string):
    prev=""
    length=0
    for a in string[::2]:
        if a!=prev:
            prev=a
            length+=1
    return length


def get_stat_key(traj,stat="median",tol=1e-6):
    """
    Give the trajectory corresponding to a statistic
    
    Args:
    - traj: Dictionary of key (string) and value (int,float) pairs
    - stat: String representing the statistic to return (min,median,max)
    - tol: Value tolerance for swapping out the key for a smaller one. 
            (ex: If simpler key gives the within 1e-6 of the max return and tol is 1e-6 then it's used as the maximizing key)
    Returns:
    - stat_key: The key corresponding to your statistic
    """
    sorted_hashes = sorted([a for a in traj],key=lambda x:traj[x][1],reverse=True)
    sorted_amounts = np.array([traj[x][0] for x in sorted_hashes])
    median_count=sorted_amounts.sum()/2
    total_count=0
    
    if stat=="min":
        stat_key = sorted_hashes[-1]
    elif stat=="max":
        stat_key = sorted_hashes[0]
    else:
      for i,count in enumerate(sorted_amounts):
        total_count+=count
        if total_count>=median_count:
            stat_key = sorted_hashes[i]
            break
    
    
    stat_return=traj[stat_key][1]
    for key in traj:
        count,ret=traj[key]
        #if abs(ret-stat_return)<tol:print(ret,,count)
        if stat == "median" and abs(ret-stat_return)<tol and \
        (count>traj[stat_key][0] or (count==traj[stat_key][0] and trunclen(key)<trunclen(stat_key))):
            stat_key=key
        elif stat != "median" and abs(ret-stat_return)<tol and trunclen(key)<trunclen(stat_key):
            stat_key=key
    return stat_key

In [None]:
def show_trajectories(trajectories:dict, actions, bins_per_action, figsize=(7,3),
                      alpha_map=lambda x:x**2, max_allowed=1e6, fig_ax=None, L=None,use_return=False, **kwargs):

    """
    Plots a dictionary of hashed trajectories with transparency based off of the alpha map
    
    Args:
    - trajectories (dict): Dictionary where keys are the trajectories and values are weights 
            (ex: keys are number trajectory counts, or reward achieved with the trajectory)
    - actions (list of str): List of the names of each action
    - bins_per_action (int): The number of bins for each main action in the multidiscrete
    - figsize (tuple): The size you want the figure to be
    - alpha_map (function): A function that translates normalized values (divided by the max to fit in (-inf,1))
            to alpha values
    - max_allowed (float): The maximum number of trajectories which can be plotted
    
    """

    b = bins_per_action
    tograph = lambda x: [int(i)*10+float(x[2*j+1])*7.8/b+(9-9*.78)/2 for j,i in enumerate(x[::2])]
    
    #order the dict based on frequency
    sorted_hashes = sorted([a for a in trajectories],key=lambda x:trajectories[x][use_return],reverse=True)
    sorted_amounts = [trajectories[x][use_return] for x in sorted_hashes]
    
    #scale the graph
    if L is None:
        L=max(12,len(sorted_hashes[0])//2)
    
    if fig_ax is None:
        fig = plt.figure(1,figsize=figsize, dpi=240, facecolor='w', edgecolor='k')
        ax=plt.gca()

        for j,act in enumerate(actions[::-1]):
            j=len(actions)-j-1
            plt.fill_between([-0.5,L],[j*10-0.5,j*10-0.5],[j*10+9.5,j*10+9.5],alpha=0.5)
            plt.text(L*0.9875,j*10+5,act,horizontalalignment="right",bbox=dict(boxstyle="square",facecolor="w",edgecolor="k",alpha=0.2))

        plt.xlabel("Step")
        plt.ylabel("Action")
        plt.xlim(-0.5,L)
        plt.ylim(-0.5,len(actions)*10-0.5)
        plt.yticks([])
    
    
    else:
        fig,ax=fig_ax
    for a,string in enumerate(sorted_hashes):
        if a<max_allowed:
            plt.plot(tograph(string),"-",alpha=alpha_map(sorted_amounts[a]/sorted_amounts[0]),**kwargs)
        else:break
            

    return fig,ax

# Distillation Bench

In [None]:
parent_dir = "MODELS\\GenWurtzDistill-v1"
folders,objectives = load_rollouts(parent_dir,obj=default_obj,last=False,verbose=False)

Hfolders,Hobjectives=load_rollouts(parent_dir+"\\Heuristic",obj=default_obj,last=True,verbose=False)

## Results During Training

In [None]:
#processing the data for plotting takes a long time so it's useful to cache the plot data (in case you want to regenerate it)
cache=[]

In [None]:
fig = plt.figure(1,figsize=(8,6), dpi=240, facecolor='w', edgecolor='k')

colors = ["r","b","g","darkviolet","y","c","k"]
points = ['s','.', '+', 'D','*',"x","p"]

for i,algo in enumerate(folders):
    subf = parent_dir+"\\"+algo
    #Average over the different runs
    if len(cache)<=i:
        returns,counts=merge_varying_graphs(subf,steps=1000,separate_runs=False)  
        mean,stdv,steps,n = mean_stdv_step_n(returns,interp_steps=1,steps=1000)
        cache.append([mean,stdv,steps,n])
    else:
        mean,stdv,steps,n=cache[i]
    
    plt.plot(steps,mean,"-",marker=points[i],color=colors[i],label=algo,ms=3,lw=0.5)
    plt.fill_between(steps,mean-stdv,mean+stdv,color=colors[i],alpha=0.4-i/20,lw=0)
    
plt.xlim(steps[0],np.ceil(steps[-1]/5e3)*5e3)
plt.xlabel("Step")
plt.ylabel("Return")

plt.plot([steps[0],np.ceil(steps[-1]/5e3)*5e3],[0.82,0.82],"k--",label="Best Possible Return")
plt.plot([0,1e5],[0.0,0.0],"k-")
plt.legend(loc="lower right",fontsize=10)  

plt.ylim(-0.3,1)
plt.savefig("Figures\\Distill\\DistillPerformance.pdf",bbox_inches="tight")
plt.title("WurtzDistill-v1 Average Return vs Sequential Step \n (10 envs in used in parallel, average over 10 runs)")

plt.show()

# Looking at trajectories

In [None]:
def salt_check(frame,get_salt=True,has_salt_targ=False):
    is_salty = np.array([a["NaCl"] for a in frame.Info])
    if get_salt:
        cframe=frame[is_salty]
    else:
        cframe=frame[~is_salty]
        
    if not has_salt_targ:
        obs = np.stack(cframe.InState)
        cframe=cframe[obs[:,0,-1]<0.9]
        
    cframe=pd.concat([cframe],ignore_index=True)
    return cframe

actions = ['dT', 'Pour 0->1', 'Pour 1->2', 'Wait', 'End Experiment' ]

In [None]:
colors={"PPO":"darkgreen","PPO-XL":"darkred","A2C":"darkgoldenrod","DQN":"darkblue"}
markers={"PPO":".","PPO-XL":"*","A2C":"^","DQN":"2"}

figax=None
legend=[]

max_trajectories = dict()
for alg in ["DQN","A2C","PPO","PPO-XL"]:

    #change this depending on if you want trajectories with or without salt
    cframe=salt_check(folders[alg],True)
    
    raw_trajectories=hashed_trajectories(cframe,N=4,N2=10,gamma=1.0)
    
    #getting the best trajectories
    info_key=get_stat_key(raw_trajectories,"max")
    
    max_trajectories[alg]=info_key
    #plotting returns
    info_dict={info_key:raw_trajectories[info_key]}
    label=alg+" (%.2f)"%raw_trajectories[info_key][1]
    figax=show_trajectories(info_dict, actions, 10, figsize=(20,3), max_allowed=1, fig_ax=figax,L=24,ms=8,color=colors[alg],marker=markers[alg],label=label)

figax[1].legend(loc=(0.5,0.7),fontsize=8)
plt.title("Best Trajectory")

plt.show()

In [None]:
dmap=lambda x,y:(y/10,1)

#already defined above
def trunclen(string):
    prev=""
    length=0
    for a in string[::2]:
        if a!=prev:
            prev=a
            length+=1
    return length

style = "<style>svg{width: 30% !important; height: 80% !important;}</style>"

for alg in max_trajectories:
    act=max_trajectories[alg]
    print(alg)

    svg_header=f'<svg width="{400*trunclen(act)}" height="{500}" viewBox="-100 0 {400*trunclen(act)} 500" xmlns="http://www.w3.org/2000/svg">'
    code=show_actions_mean_grouped(act,10,DISTILL_ACTIONS,dmap)
    display(HTML(style +svg_header+code +"</svg>"))
    
    with open(f"Figures\\Distill\\Strategies\\{alg}_best_salted.svg","w") as f:
        f.write(svg_header+code +"</svg>")

# Extraction Bench

In [None]:
parent_dir = "MODELS\\GenWurtzExtract-v2"
#parent_dir="MODELS\\DiscreteWurtzExtract-v1\\PPO-XL\\13-03-2023--06-08-15"

sus=lambda x: -abs(default_obj(x)-0.15)

folders,objectives = load_rollouts(parent_dir,obj=default_obj,last=False,verbose=False)

#eheuristic = pd.read_pickle("MODELS\\WurtzExtract-v1\\Heuristic\\rollout")
Hfolders,Hobjectives=load_rollouts(parent_dir+"\\Heuristic",obj=default_obj,last=True,verbose=False)
    
    
ACTIONS_V2=["Drain EV to B1", "Mix EV","Pour B1 into EV","Pour B2 into EV", 
        "Pour EV into B2", "Pour S1 into EV", "Pour S2 into EV","Wait","End Experiment"]


ACTIONS_V1=["Drain EV to B1", "Mix EV","Pour B1 into EV","Pour B2 into EV", 
            "Pour EV into B2", "Pour S1 into EV", "Pour S2 into EV","End Experiment","Wait (Implicit)"]

## Results During Training

In [None]:
cache2=[]

In [None]:
fig = plt.figure(1,figsize=(8,6), dpi=240, facecolor='w', edgecolor='k')

colors = ["r","b","g","darkviolet","y","c","k"]
points = ['s','.', '+', 'D','*',"x","p"]

for i,algo in enumerate(folders):
    subf = parent_dir+"\\"+algo

    if len(cache2)<=i:
        returns,counts=merge_varying_graphs(subf,steps=1000,separate_runs=False)  
        mean,stdv,steps,n = mean_stdv_step_n(returns,interp_steps=1,steps=1000)
        cache2.append([mean,stdv,steps,n])
    else:
        mean,stdv,steps,n=cache2[i]
    
    plt.plot(steps,mean,"-",marker=points[i],color=colors[i],label=algo,ms=3,lw=0.5)
    plt.fill_between(steps,mean-stdv,mean+stdv,color=colors[i],alpha=0.4-i/20,lw=0)
    
    
plt.plot([steps[0],np.ceil(steps[-1]/5e3)*5e3],[9/14,9/14],"k--",label="Best Possible Return")

plt.plot([0,1e5],[0.0,0.0],"k-",lw=1)
    
plt.xlim(steps[0],np.ceil(steps[-1]/5e3)*5e3)
plt.xlabel("Step")
plt.ylabel("Return")
plt.legend(loc="lower right",fontsize=10)

plt.ylim(-0.65,0.7)

#plt.yscale(FuncScale(plt.gca(),(lambda x: x*(x>0)+x*(x<=0)/10,lambda x: x*(x>0)+x*(x<=0)*10)))
#plt.yticks([-0.5,0,0.5])
plt.savefig("Figures\\Extract\\Extract2runs.pdf",bbox_inches="tight")
plt.title("WurtzExtract-v2 Average Return vs Sequential Step \n (10 envs in used in parallel, average over 10 runs)")

plt.show()

# Looking at Trajectories

In [None]:
actions=ACTIONS_V2

dependencies={"2":"0","3":"4"}
colors={"PPO":"darkgreen","PPO-XL":"darkred","A2C":"darkgoldenrod","DQN":"darkblue"}
markers={"PPO":".","PPO-XL":"*","A2C":"^","DQN":"2"}
figax=None

legend=[]
info_key=dict()
max_trajectories=dict()
targ=0

for alg in ["DQN","A2C","PPO","PPO-XL"]:

    raw_trajectories=hashed_trajectories(target_subset(folders[alg],len(CWtargs),targ),N=7,N2=5)
    info_key=get_stat_key(raw_trajectories,"max")
    max_trajectories[alg]=info_key
    
    info_dict={info_key:raw_trajectories[info_key]}    
    legend.append(f"{alg} (%.2f)"%raw_trajectories[info_key][1])
    figax=show_trajectories(info_dict, actions, 5, figsize=(16,2), max_allowed=1, fig_ax=figax,L=30,ms=8,color=colors[alg],marker=markers[alg],label=legend[-1])

figax[1].legend(loc=(0.5,0.5),fontsize=8)

plt.show()

In [None]:
max_trajectories

In [None]:
emap=lambda x,y:(y/5 if x!= 7 else 2**(y-1)/16,1)

style = "<style>svg{width: 30% !important; height: 80% !important;}</style>"
for alg in max_trajectories:
    act=max_trajectories[alg]
    print(alg)

    svg_header=f'<svg width="{400*trunclen(act)}" height="{500}" viewBox="-100 0 {400*trunclen(act)} 500" xmlns="http://www.w3.org/2000/svg">'

    code=show_actions_mean_grouped(act,10,EXTRACT_ACTIONS,emap,False)

    display(HTML(style +svg_header+code +"</svg>"))
    os.makedirs(f"Figures\\Extract\\Strategies\\{CWtargs[targ]}",exist_ok=True)
    with open(f"Figures\\Extract\\Strategies\\{CWtargs[targ]}\\{alg}_best.svg","w") as f:
        f.write(svg_header+code +"</svg>")

In [None]:
colors = ["r","b","g","darkviolet","y","c","k"]
def p_success(scores,levels):
    scores=np.array(scores)
    p=np.zeros(len(levels))
    for i,level in enumerate(levels):
        p[i]=(scores>=level).sum()*1.0
    return p

levels=np.array(Hobjectives["Heuristic"])
#levels[0]=0
Hist=dict()
x = np.linspace(-0.05,levels[-1]+0.05,40)

for i,alg in enumerate(objectives):
    print(len(objectives[alg]))
    counts,xpos,__ = plt.hist(objectives[alg],bins=x,color=colors[i],alpha=0.5)
    Hist[alg]=counts
#plt.show()
plt.close()
names=[a for a in Hist]
dx=x[1]-x[0]
for i in range(x.shape[0]-1):
    
    arr = [(Hist[alg][i],j) for j,alg in enumerate(names)]
    
    for val,idx in sorted(arr,key=lambda x:-abs(x[0])):
        #print(idx,end="|")
        if i==0:
            plt.bar(x[i]+dx/2,val,color=colors[idx],label=names[idx],width=dx,alpha=1)
        else:
            plt.bar(x[i]+dx/2,val,color=colors[idx],width=dx,alpha=1)
            
plt.legend()

for level in levels:
    height=max([Hist[alg].max() for alg in Hist])
    plt.plot([level,level],[0,height*1.05],"--",color=(0.5,0.5,0.5))
    
plt.yticks(np.arange(height+1)[::2])
plt.ylim(0,height*1.05)

plt.xlabel("Return")
plt.ylabel("Number of Policies")
plt.title("Histogram of Policy Returns (30 Policies For each Algorithm)")

In [None]:
1/0

In [None]:
trunclen=lambda x:8

svg_header=f'<svg width="{400*trunclen(1)}" height="{500}" viewBox="-100 0 {400*trunclen(1)} 500" xmlns="http://www.w3.org/2000/svg">'

code=show_actions_extra_grouped("5074700004040404647472040404040480",10,EXTRACT_ACTIONS,extract_v2_action_map,False)

display(HTML(svg_header+code +"</svg>"))




In [None]:
import os
os.system("python RLTrain.py algorithm=PPO environment=WurtzExtract-v1 n_envs=20 seed=1 steps=50000 dummy_vec=True")

In [None]:
import os
os.system("python RLTrain.py algorithm=PPO environment=WurtzExtract-v1 n_envs=20 seed=1 steps=50000 dummy_vec=True")

In [None]:
import os
def rollouts(folder):
    for a,b,c in os.walk("./MODELS\\"+folder):
        #print(a)
        if "-v" in a and "2023" in a:
            print(a[2:])
            os.popen("python RLTest.py %s steps=10000 --best"%a)
            os.system("python RLTest.py %s steps=10000"%a)


rollouts("GenWurtzDistill-v1\\DQN")

In [None]:
import os
def rollouts(folder):
    best=-1e10
    dir_=""
    for a,b,c in os.walk("./MODELS\\"+folder):
        #print(a)
        if "output.log" in c:
            print(len(c)-6,end="|")
            #print(a[2:])
            this_best=-1e10
            with open(a+"\\output.log") as f:
                for line in f:
                    if "BEST" in line:
                    #if "Mean Return" in line:
                        val = float(line.split(":")[1].split()[0])
                        if val>this_best:
                            this_best=val
                        if val>best:
                            best=val
                            dir_=a
            print(this_best)
    return best,dir_
                        
            
rollouts("GenWurtzExtract-v2")

In [None]:
from RLTrain import *

In [None]:
def f(count=[0]):
    env = gym.make("GenWurtzExtract-v1")
    return env

env = DummyVecEnv([f]*20) 
model = PPO.load("MODELS\\GenWurtzExtract-v1\\PPO-XL\\02-04-2023--06-51-29\\best_model.zip")

In [None]:
model.set_env(env)
model.set_random_seed(10)

# Better visualization

- Logos with lines

- Adjacency matrix with values as transition probabilities

- Amounts (action sub-value) show repeating actions

- Padding character (could instead just look at actions n-m)

- Managing these new strings with conventional methods

In [None]:
def trunclen(string):
    prev=""
    length=0
    for a in string[::2]:
        if a!=prev:
            prev=a
            length+=1
    return length


act="0909090929090909090940"

style = "<style>svg{width: 30% !important; height: 80% !important;}</style>"

svg_header=f'<svg width="{400*trunclen(act)}" height="{500}" viewBox="-100 0 {400*trunclen(act)} 500" xmlns="http://www.w3.org/2000/svg">'

#all_actions = [lambda x:DISTILL_0(x/2+0.5)]+DISTILL_ACTIONS[1:]

code=show_actions_mean_grouped(act,10,DISTILL_ACTIONS,dmap)


display(HTML(style +svg_header+code +"</svg>"))

with open(f"Figures\\Distill\\Strategy-3.svg","w") as f:
        f.write(svg_header+code +"</svg>")

In [None]:

frame,_=load_rollouts(parent_dir+"\\Heuristic\\2",obj=default_obj,last=True,verbose=False)

f = frame["Heuristic"]

#frame=folders[alg]
#target_subset(frame,len(CWtargs),targ)

raw_trajectories=hashed_trajectories(f,N=7,N2=5)

In [None]:
keys = sorted([a for a in raw_trajectories],key=lambda x: raw_trajectories[x][0])

raw_trajectories[keys[-1]]

In [None]:
keys

In [None]:
act=keys[-1]#[:8]+"80"

style = "<style>svg{width: 30% !important; height: 80% !important;}</style>"

svg_header=f'<svg width="{400*trunclen(act)}" height="{500}" viewBox="-100 0 {400*trunclen(act)} 500" xmlns="http://www.w3.org/2000/svg">'


code=show_actions_mean_grouped(act,10,EXTRACT_ACTIONS,emap)


display(HTML(style +svg_header+code +"</svg>"))

with open(f"Figures\\Extract\\Strategy-2.svg","w") as f:
        f.write(svg_header+code +"</svg>")

In [None]:
np.stack(f.Action).shape

In [None]:
np.stack(frame.Action).shape

In [None]:
names=[a for a in objectives]

fig = plt.figure(1,figsize=(2,2), dpi=240, facecolor='w', edgecolor='k')

x=np.arange(20)+1
for i in range(20):
    
    arr = [(sorted(objectives[alg])[i],j) for j,alg in enumerate(names)]
    
    for val,idx in sorted(arr,key=lambda x:-abs(x[0])):
        #print(idx,end="|")
        if i==0:
            plt.bar(x[i],val,color=colors[idx],label=names[idx])
        else:
            plt.bar(x[i],val,color=colors[idx])
    #print(" Iter:",i)

#plt.legend()
plt.xticks(x)
#plt.yticks([])
plt.ylabel("Best Return")
plt.xlabel("Run (sorted)")
#plt.ylim(-0.3,1)
#plt.savefig("Legacy\\Figures\\tmpEB.pdf")