In [None]:
import numpy as np 
from matplotlib import pyplot as plt
import json

In [None]:
base_directory="./"

In [None]:
np.random.seed(100)

In [None]:
def obtain_random_samples(feature_num,initial_num=10):
    """
    To do: generate random uniformly distributed initial samples
    Args:
        feature_num: number of features
        initial_num: number of initial samples
    Returns:
        X: randomly generated samples
    """
    X_initial=np.random.uniform(size=(initial_num,feature_num-1))
    X_temp=np.sort(np.hstack((np.hstack((np.zeros((X_initial.shape[0],1)),X_initial)),np.ones((X_initial.shape[0],1)))))
    X=np.diff(X_temp,axis=-1)
    return X

In [None]:
def obtain_constrained_random_samples(feature_num,constrain=1,initial_num=10):
    """
    To do: generate random uniformly distributed initial samples with a linear constrain
    Args:
        feature_num: number of features
        initial_num: number of initial samples
    Returns:
        X: randomly generated samples
    """
    X = np.empty((0,feature_num))
    while len(X) < initial_num:
        X_temp = np.random.uniform(0,1,(1,feature_num))
        if X_temp.sum() <= constrain:
            X = np.append(X,X_temp,axis=0)
    return X

In [None]:
def cross_over(pool_absorption,feature_num):
    """
    To do: cross-over based on the pool of absorption
    Args:
        pool_absorption: the pool of high absoprtion performance
        feature_num: the number of sample features
    Returns:
        offspring: the offspring after crossover
    """
    pool=np.array(pool_absorption)[:,0:feature_num]
    index=np.random.choice(len(pool),size=2,replace=True)
    offspring=np.zeros((1,feature_num))
    for i in range(feature_num):
        offspring[0,i]=pool[index[np.random.randint(2)]][i]
    print("crossover")
    print(index)
    print(offspring)
    return offspring

In [None]:
def mutation(offspring,mutate_rate,sigma,constrain=1):
    """
    To do: mutate the genotype of the offsprings
    Args:
        offspring: the offspring to be evaluated
        mutate_rate: the probability of mutation
    Returns:
        mutated_offspring: the mutated offspring
        mutation_flag: the flag indicating if mutation happened
    """
    offspring=offspring.reshape(1,-1)
    # keeping tracing the numerical error and get rid of possible numerical error
    if offspring.sum() > constrain:
        offspring = offspring/offspring.sum()
        
    solutions=[]
    if np.random.random() < mutate_rate:
        print("mutation_operation on")
        mutation_direction = np.random.normal(0,sigma,offspring.shape)
        offspring_direct = offspring + mutation_direction
        # after direct mutation, check if all the constrains are satisfied
        # if not, calculate the minimum distance we can add with this mutation direction
        if offspring_direct.sum()>constrain:
            solutions.append((constrain-offspring.sum())/mutation_direction.sum())
        index = np.where(offspring_direct<0)
        for x in range(len(index[0])):
            solutions.append(0-offspring[index[0][x],index[1][x]]/mutation_direction[index[0][x],index[1][x]])
        solutions = np.array(solutions)
        # sometimes because of numerical error, the minimum quantity is not 0 but smaller than zero
        # the absolute value is small enough but we still want to see it
        if (np.array(solutions)<0).sum()>0:
            print("warning:mutation_direction changed in mutation!")
            print(solutions)
        # get rid of the numerical error mentioned above by setting it to 0
        solutions[solutions<1e-10] = 0
        
        if len(solutions)>0: # return the mutation result under absoprtion boundary condition
            return offspring+np.min(solutions)*mutation_direction,True
        else: # return the mutation result without hitting any boundary
            return offspring+mutation_direction,True
    else:
        return offspring,False

In [None]:
def generate_offspring(feature_num,pool_absorption,mutation_rate,sigma,batch_size):
    """
    To do: generate the next set of experiments according to the current observation
    Args:
        feature_num: the number of sample features
        pool_absorption: the pool recording the information of maximized absorption band: 
                        [feature,performance,index of attribute]
        mutation_rate: the rate for mutation
        sigma: the standard deviation of mutation
        batch_size: the experiment number for every generation
    Returns:
        offspring: the generated offspring
    """
    # performance cross_over+mutation
    offspring=np.empty((0,feature_num))
    while len(offspring)<batch_size/2:
        # performance crossover and normalize it to be within the range
        offspring_temp_original=cross_over(pool_absorption,feature_num)
        # normalize the offsrping_temp_original to the boundary if it's out of boundary (might still be a slightly different)
        if offspring_temp_original.sum(axis=1)>1:
            offspring_temp_original = offspring_temp_original/offspring_temp_original.sum(axis=1)
            
            # might be some numerical error
            # here we print it to make sure it's small
            if offspring_temp_original.sum() != 1: 
                print("offspring_temp_original is not exactly equal to 1")
                print(offspring_temp_original.sum())

        # performance mutation and the probabilit of getting mutated is mutation_rate
        # if it's near the boundary, the mutation chance would be small because of the limitation of the boundary
        print("before mutation")
        print(offspring_temp_original)
        offspring_temp,mutation_flag=mutation(offspring_temp_original,mutation_rate,sigma)
        print(mutation_flag)
        print("after mutation")
        print(offspring_temp)
        # here we check point if the boundary still holds
        if sum(abs(offspring_temp[offspring_temp<0])>1e-5)>0:
            print("Error! The negative value is too large!")
            print(offspring_temp)
        # we also need to correct the numerical error after mutation when the float point is smaller than 0
        offspring_temp[offspring_temp<0] = 0
        
        # performance a validate mutation if we decide to mutate so it's different from the original one
        # it's to get rid of the trapping problem in the boundary
        while (mutation_flag == True) and ((abs(offspring_temp - offspring_temp_original)<1e-10).sum() == feature_num):
            offspring_temp,mutation_flag=mutation(offspring_temp_original,1,sigma)
            # check point, if the negative value is too large, there's an error
            if sum(abs(offspring_temp[offspring_temp<0])>1e-5)>0:
                print("Error! The negative value is too large!")
                print(offspring_temp)
            # correct the numerical error after mutation
            offspring_temp[offspring_temp<0] = 0
            if mutation_flag == False:
                print("Error in crossover!")
        # after everything, check if the constrain still holds
        # numerical error is acceptable and we try to correct it, but in some time we can't correct
        # as long as it's small, it's fine
        if offspring_temp.sum(axis=1)>1:
            print("In crossover, the summation is larger than 1!")
            print("Mutation flag: {}".format(mutation_flag))
            print(offspring_temp.sum())
            offspring_temp=offspring_temp/offspring_temp.sum()
            print(offspring_temp.sum())
        
        offspring=np.append(offspring,offspring_temp,axis=0)
        offspring=np.unique(offspring,axis=0)
        
    # performance pure mutation
    pool=np.array(pool_absorption)[:,0:feature_num]
    while len(offspring)<batch_size:
        # randomly select one sample for mutation
        index=np.random.choice(len(pool),size=1).item()
        offspring_temp,mutation_flag=mutation(pool[index],1,sigma)
        # check point
        if sum(abs(offspring_temp[offspring_temp<0])>1e-5)>0:
            print("Error! The negative value is too large!")
            print(offspring_temp)
        offspring_temp[offspring_temp<0] = 0
        # if the mutation doesn't happen in this case, there's an error
        if mutation_flag == False:
            print("Error in mutation!")
        # get rid of the problem when it's in the boundary, 
        # it has the probabilit of being trapped and didn't really get changed.
        while ((abs(offspring_temp - pool[index])<1e-10).sum() == feature_num):
            offspring_temp,mutation_flag = mutation(pool[index],1,sigma)
            # check point
            if sum(abs(offspring_temp[offspring_temp<0])>1e-5)>0:
                print("Error! The negative value is too large!")
            # get rid of numerical error
            offspring_temp[offspring_temp<0] = 0
            if mutation_flag == False:
                print("Error in mutation!")
        # due to numerical error, it's acceptable. But the difference shouldn't be to much
        if offspring_temp.sum(axis=1)>1:
            print("In mutation, the summation is larger than 1!")
            print("Mutation flag: {}".format(mutation_flag))
            print(offspring_temp.sum())
            offspring_temp=offspring_temp/offspring_temp.sum()
            print(offspring_temp.sum())
            
        offspring=np.append(offspring,offspring_temp,axis=0)
        offspring=np.unique(offspring,axis=0)
        print("In mutation, the original pool[index] is \n {}".format(pool[index]))
        print("After mutation, it's \n {}".format(offspring_temp))
        print("{}".format(pool[index] - offspring_temp))
    return offspring

In [None]:
def update_pool(offspring,offspring_performance,offspring_index,pool_absorption):
    """
    To do: update the current pool with MAP-elite algorithm
    Args:
        offspring: the offspring 
        offspring_performance: the main performance from the offspring
        offspring_index: the interested attributes from the offspring
        pool_absorption: the current pool of samples with high absorption
    Returns:
        pool_absorption: the updated pool of samples with high absorption
    """
    index=offspring_index
    
    for attribute_index in np.unique(index):
        performance_temp=offspring_performance[index==attribute_index]
        offspring_temp=offspring[index==attribute_index]
        sample_1_index=np.argmax(performance_temp[:,0])
        sample_1=np.concatenate((offspring_temp[sample_1_index],
                                 performance_temp[sample_1_index],
                                 [attribute_index]))
        
        if len(pool_absorption[pool_absorption[:,-1]==attribute_index])==0:
            pool_absorption=np.vstack((pool_absorption,sample_1))
        elif pool_absorption[pool_absorption[:,-1]==attribute_index][0,feature_num]<sample_1[feature_num]:
            pool_absorption[pool_absorption[:,-1]==attribute_index]=sample_1.reshape(1,-1)
            
    return pool_absorption

In [None]:
def create_reagent_volume(X,Ranges,V_total=11.5):
    X_new=np.zeros((X.shape))
    for i in range(len(Ranges)):
        X_new[:,i]=X[:,i]*(Ranges[i][1]-Ranges[i][0])+Ranges[i][0]
    X_new=np.around(X_new,2)
    X_final=np.hstack((X_new,V_total-X_new.sum(axis=1).reshape(-1,1)))
    return np.around(X_final,2)

In [None]:
def generate_json_file_for_Nanobot(X,Ranges,V_total,generation_num,random_sampling_size=3):
    #write out the volume of algorithm X
    if generation_num==0:
        pass
    else:
        X_temp=obtain_constrained_random_samples(feature_num=feature_num,initial_num=random_sampling_size)
        X=np.vstack((X,X_temp))
        
    reagents=create_reagent_volume(X,Ranges,V_total)
    reagent_dic = {}
    
    reagents[reagents<=0] = 10**(-10)
    
    for i in range(len(X)):
        reagent_dic["exp{}".format(i)]={}
        reagent_dic["exp{}".format(i)]["surfactant"]=reagents[i][1]
        reagent_dic["exp{}".format(i)]["gold"]=reagents[i][0]
        reagent_dic["exp{}".format(i)]["silver"]=reagents[i][2]
        reagent_dic["exp{}".format(i)]["reductant"]=reagents[i][3]
        reagent_dic["exp{}".format(i)]["water"]=reagents[i][4]
        reagent_dic["exp{}".format(i)]["seeds"]=0.5 
        
#     #add a reference experimental conditions to make sure Nanobot is working fine
    reagent_dic["exp{}".format(23)]={}
    reagent_dic["exp{}".format(23)]["surfactant"]=4.4
    reagent_dic["exp{}".format(23)]["gold"]=2.5
    reagent_dic["exp{}".format(23)]["silver"]=1.8
    reagent_dic["exp{}".format(23)]["reductant"]=1.1
    reagent_dic["exp{}".format(23)]["water"]=1.8
    reagent_dic["exp{}".format(23)]["seeds"]=0.5

    with open(base_directory+'data%d.json'%generation_num, 'w') as outfile:
        json.dump(reagent_dic, outfile)
    return reagents,X

In [None]:
def objective_Nanobot(X,generation_num):
    """
    read in the processed data except the last sample (because it's a reference sample)
    Args:
        X: the original X
        generation_num: generation number
    Returns:
        X_new: the duplicated X (corresponding to peak system)
        performance: the corresponding performance
        index_set: the index
        roughenss_set: the roughness of the sample UV-Vis
    """
    data=np.load(base_directory+"MAP_elite_generation_%d"%generation_num+"/data_total.npy",allow_pickle=True)
    performance = []
    X_new = []
    index_set = []
    roughness_set = []
    prominence_set=[]
    if len(X) != 23:
        print("Possible wrong X size!")
    for i in range(len(X)):
        data_temp = data[i]
        if len(data_temp) == 0:
            print("Sample %d has no data" %i)
            pass
        else:
            if data_temp[0] == 1:
                X_new.append(X[i])
                performance.append(data_temp[1])
                index_set.append(data_temp[2])
                roughness_set.append(data_temp[3])
                prominence_set.append(data_temp[4])
            else:
                X_new.append(X[i])
                X_new.append(X[i])
                performance.append(data_temp[1])
                performance.append(data_temp[2])
                index_set.append(100+100*data_temp[3][0]+data_temp[3][1])
                index_set.append(10000+10000*data_temp[3][0]+data_temp[3][1])
                roughness_set.append(data_temp[4])
                roughness_set.append(data_temp[4])
                prominence_set.append(data_temp[5])
                prominence_set.append(data_temp[5])
                
    return np.array(X_new),np.array(performance).reshape(-1,1),np.array(index_set),np.array(roughness_set),np.array(prominence_set)

In [None]:
feature_num=4 # degree of freedoms in this experiment
initial_num=23 # the initial random sampling number and here we apply 23 random experiments + 1 reference experiemnts
batch_size=20 # the number of mutation+crossover in that generations, 
            # and we apply 20 normal experiments+ 3 random + 1 reference
    
mutation_rate=0.4 # the probability that a mutation can happen after cross-over
sigma= 0.05 # the sigma of gassuain distribution in the mutation process

generation_num=0

#first, a random sampling is utilized to generate random samples
X=obtain_constrained_random_samples(feature_num=4,initial_num=initial_num)
Ranges=[[0,11.5], #Au
        [0,11.5], #CTAB
        [0,11.5], #Ag
        [0,11.5]] #Reductant
# then, a json_file is generated according to X and Ranges, constraning the overall volume and given the reference condition
reagents,X=generate_json_file_for_Nanobot(X,Ranges,V_total=11.5,generation_num=generation_num,random_sampling_size=3)
print(X.sum(axis=1).max())
print(X.min())
print(X)
print(reagents.min())

In [None]:
# RunNanobot and get the data

In [None]:
# got the results from Nanobot and analyze them with the UV-Vis spectrum processor
input("Press Enter to continue after running the platform and analyzing the data ...")

In [None]:
#read in the results from spectrum processor
X,performance,index,roughness,prominence_set=objective_Nanobot(X,generation_num)

In [None]:
#filter bad data
good_data_index=(roughness.flatten()<0.005) & (prominence_set>0.2)
X=X[good_data_index]
performance=performance[good_data_index]
index=np.around(index[good_data_index])

In [None]:
#create the initial pool
pool_absorption=[]
#noting both performance are recorded and attached to the pool
for attribute_index in np.unique(index):
    performance_temp=performance[index==attribute_index]
    X_temp=X[index==attribute_index]
    
    sample_1_index=np.argmax(performance_temp[:,0])
    sample_1=np.concatenate((X_temp[sample_1_index],
                             performance_temp[sample_1_index],
                             [attribute_index]))
    pool_absorption.append(sample_1)
    
pool_absorption=np.array(pool_absorption)

np.savez(base_directory+"MAP_elite_generation_%d"%generation_num+"/pool_absorption%d"%generation_num,pool_absorption)

generation_num=generation_num+1

print(len(pool_absorption))

# the main part to run the platform

In [None]:
for loop in range(9):
    #generate offspring according to the current pool
    offspring=generate_offspring(feature_num,pool_absorption,mutation_rate,sigma,batch_size)
    reagents,X=generate_json_file_for_Nanobot(offspring,Ranges,V_total=11.5,generation_num=generation_num)
    offspring=X
    print(X.sum(axis=1))
    print(X)
    print(reagents.min())
    print(reagents.sum(axis=1))
    
    # Run nanobot and get the data
    input("Press Enter to continue after running the platform and analyzing the data ...")
    
    #read in the current offspring results
    offspring,offspring_performance,offspring_index,roughness,prominence_set=objective_Nanobot(offspring,generation_num)

    #filter bad data
    good_data_index = (roughness.flatten()<0.005) & (prominence_set>0.2)

    offspring=offspring[good_data_index]
    offspring_performance=offspring_performance[good_data_index]
    offspring_index=np.around(offspring_index[good_data_index])
    #update the pool
    pool_absorption=update_pool(offspring,offspring_performance,offspring_index,pool_absorption)
    print(generation_num)
    print(pool_absorption)
    np.savez(base_directory+"MAP_elite_generation_%d"%generation_num+"/pool_absorption%d"%generation_num,pool_absorption)

    generation_num=generation_num+1