In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as signal
from scipy.interpolate import interp1d
from scipy.signal import find_peaks, peak_widths,peak_prominences
import bisect
import pathlib

In [None]:
def low_pass_filter(wavelengths,intensities,fc=15):
    """
    To do:
        Use a low pass filter to deal with the read_in data.
    Args:
        wavelengths: the input wavelength.
        intensities: the corresponding intensities of UV-Vis.
        fc: frequency of low pass filter.
    Returns:
        series: the intensities passing through the low-pass filter.
    """
    fs = wavelengths.shape[0]  # Sampling frequency
    w = fc / (fs / 2) # Normalize the frequency
    b, a = signal.butter(5, w, 'low')
    series = signal.filtfilt(b, a, intensities)
    return series

In [None]:
def obtain_prominence(x,a=1,b=70,c=0.4,d=100,e=1,f=0.1,threshold=0.05):
    """
    To do: 
        Definte a function which is more sensitive when x is small.
    Args:
        x: the peak prominence to be processed by this function.
        a,b,c,d,e,f: constant the tune the shape of this function.
        threshold: the threshold from where this function begin to behave like a linear function.
    Returns:
        The processed peak prominence after this function, which will be further used to calculate the scores.
    """
    if x<threshold:
        return (np.tanh((x-threshold)*d)+e)*f
    else:
        return (a*x+c*(1/(1+np.exp(-b*x))-0.5))/(a+c*(1/(1+np.exp(-b))-0.5))

In [None]:
def peak_binary(x,b=100,threshold=0.05):
    """
    To do:
        Use a tanh function to binarize peaks accoridng to its promiencen.
    Args:
        x: the peak prominence to be processed by this function.
        b: a variable to tune the shape of this function.
        threshold: the threshold after which the function behave like a linear function.
    Returns:
        the processed peak prominence.
    """
    return (np.tanh((x-threshold)*b)+1)/2

In [None]:
def normalizedata(series_original):
    """
    To do:
        Definte a function which normalize the input data into range (0,1).
        
    """
    return (series_original-np.min(series_original))/(np.max(series_original)-np.min(series_original))

In [None]:
def calcualte_smoothness(x):
    """
    To do:
        Calculate the smootheness of the spectrum. 
        The smoothness is defined byy the absolute difference between the original spectrum and the spectrum after low-pass filter.
    Args:
        x: the UV-Vis spectrum
    Returns:
        The quantity measuring the smootheness of this spectrum.
    """
    return np.std(np.diff(x))/abs(np.mean(np.diff(x)))

In [None]:
def read_in_UV_Vis(base_sample,index,lower=400,upper=950,fc=15,color="red",plot_flag=True,normalize=False):
    #Read in json file
    with open(base_sample+f"00%02d/uv.json"%(index)) as json_data:
        d = json.load(json_data)
        #read wavelength and intensities
        wavelengths = np.array(d['wavelength'])
        series_original = np.array(d['absorbances'])
    #trim data
    series_original = series_original[(wavelengths>lower) & (wavelengths < upper)]
    wavelengths = wavelengths[(wavelengths>lower) & (wavelengths < upper)]
    #trim data in the range of lower to upper
    series_original=normalizedata(series_original[(wavelengths>lower) & (wavelengths < upper)])
    series=low_pass_filter(wavelengths,series_original,fc=fc)
    roughness=abs(series-series_original).mean()
    
    series=normalizedata(series)
    
    if plot_flag and normalize==True:
        plt.plot(wavelengths,series,c="black")
        plt.plot(wavelengths,series_original,c=color)
    
    UV_inter = interp1d(wavelengths, series, kind='cubic',fill_value='extrapolate' )
    return UV_inter,roughness

In [None]:
def plot_1D(region,boundary,roughness,series,peaks,prominences,wavelengths):    
    #plot part
    plt.figure()
        
    plt.plot(wavelengths,series)
    plt.plot(wavelengths[peaks], series[peaks], "x")
    plt.vlines(boundary, ymin=0, ymax=1,color="r",linestyles ="dashed")
    for region_temp in region:
        plt.vlines(region, ymin=0, ymax=1,color="b")

In [None]:
def obtain_scores(base_sample,index,lower=400,upper=950,num=1101,False_width=10000,plot_flag=True):
    """
    To do:
        Calculate the defined UV-Vis scores of a sample.
    Args:
        base_sample: the directory of the sample
        index: the index of the sample
        boundary1: the boundaries to diecretize the UV-Vis region for one peak system
        boundary2: the boundaries to discretize the UV-Vis spectrum for two peak system
        near_width: the width to define the nearby region
        lower: the lower wavelength boundary of UV-Vis
        upper: the upper wavelength boundary of UV-Vis
        num: the sampling number in wavelength [lower,upper]
        False_width: the width this function returns when there's no peak in the system
    Returns:
        
   """
    UV_sample,roughness=read_in_UV_Vis(base_sample,index,plot_flag=plot_flag)
    #define the wavelength and got the UV-Vis spectrum
    wavelengths=np.linspace(lower,upper,num)
    series=UV_sample(wavelengths)

    #find peaks in the data
    peaks, _ = find_peaks(series,prominence=0.02)
    if len(peaks) == 0 or roughness >= 0.005:
        print("None peaks are found!")
        plt.close()
        return []
    
    #find prominence of individual peaks
    prominences = peak_prominences(series, peaks)[0]
    if max(prominences) <=0.2:
        return []
    
    results_half = peak_widths(series, peaks, rel_height=0.5)
    results_full = peak_widths(series, peaks, rel_height=1)
    peak_index = prominences.argsort()[-2:][::-1]
    peak_positions = []
    peak_width = []
    peak_intensity = []
    
    for i in range(len(peak_index)): 
        peak_positions.append(wavelengths[peaks[peak_index[i]]])
        # get the peak width
        width = results_half[0][peak_index[i]].item()*(wavelengths[1]-wavelengths[0])
        peak_width.append(width)
        peak_intensity.append(series[peaks[peak_index[i]]])
        
    # plot
    contour_heights = series[peaks] - prominences
    plt.plot(wavelengths,series,'black')
    plt.plot(wavelengths[peaks], series[peaks], "x")
    plt.plot(wavelengths,series_target,'blue')
    plt.scatter(wavelengths[_['left_bases']],series[_['left_bases']],c='black')
    plt.scatter(wavelengths[_['right_bases']],series[_['right_bases']],c='black')
    plt.vlines(x=wavelengths[peaks], ymin=contour_heights, ymax=series[peaks])
    plt.hlines(*(results_half[1],wavelengths[np.around(results_half[2]).astype("int")],wavelengths[np.around(results_half[3]).astype("int")]), color="C2")
    plt.hlines(*(results_full[1],wavelengths[np.around(results_full[2]).astype("int")],wavelengths[np.around(results_full[3]).astype("int")]), color="C3")
    
    #return the results
    return [peak_intensity,peak_positions,peak_width,series]

# Obtain the ideal UV-Vis

In [None]:
# get the optimal UV-Vis
pathlib.Path('./Optimization_generation_0').mkdir(parents=True, exist_ok=True)
optimal_UV = np.loadtxt("11_by_33nm_rods.csv",delimiter = ",")
optimal_UV = normalizedata(optimal_UV)
wavelength_temp = np.linspace(400,950,56)
UV_sample = interp1d(wavelength_temp, optimal_UV, kind='cubic',fill_value='extrapolate')
wavelengths = np.linspace(400,950,1101)

series=UV_sample(wavelengths)
series_target = np.zeros(series.shape)
series_target[:] = series.flatten()
#find peaks in the data
peaks, _ = find_peaks(series,prominence=0.02)
#find prominence of individual peaks
prominences = peak_prominences(series, peaks)[0]
results_half = peak_widths(series, peaks, rel_height=0.5)
results_full = peak_widths(series, peaks, rel_height=1)
peak_index = prominences.argsort()[-2:][::-1]
peak_positions = []
peak_width = []
peak_intensity = [] 

for i in range(len(peak_index)): 
    peak_positions.append(wavelengths[peaks[peak_index[i]]])
    # get the peak width
    width = results_half[0][peak_index[i]].item()*(wavelengths[1]-wavelengths[0])
    peak_width.append(width)
    peak_intensity.append(series[peaks[peak_index[i]]])
    
#plot 
plt.plot(wavelengths,series,'black')
plt.plot(wavelengths[peaks], series[peaks], "x")
contour_heights = series[peaks] - prominences
plt.scatter(wavelengths[_['left_bases']],series[_['left_bases']],c='black')
plt.scatter(wavelengths[_['right_bases']],series[_['right_bases']],c='black')
plt.vlines(x=wavelengths[peaks], ymin=contour_heights, ymax=series[peaks])
plt.hlines(*(results_half[1],wavelengths[np.around(results_half[2]).astype("int")],wavelengths[np.around(results_half[3]).astype("int")]), color="C2")
plt.hlines(*(results_full[1],wavelengths[np.around(results_full[2]).astype("int")],wavelengths[np.around(results_full[3]).astype("int")]), color="C3")
plt.show()
optimial_data_set = np.array([peak_intensity,peak_positions,peak_width])

# Check the score with the reference UV-Vis

In [None]:
input_space = []
output_space = []
index_total = []
for generation_num in range(10):
    plt.figure()
    # Path to exploring the first chemical space
    base_sample="../../0-Exploration/0-Chemical_Space_1/0/MAP_elite_generation_%d//"%generation_num
    total_sample_num = 23
    for j in range(total_sample_num):
        data_temp = obtain_scores(base_sample,j,lower=400,upper=950,num=1101,False_width=10000,plot_flag=False)
        if len(data_temp)>0:
            loss = abs(wavelengths[np.argmax(series_target)]-data_temp[1][0])+0.20*(abs(data_temp[3] - series_target).sum())
        else:
            loss = 100000
        output_space.append(loss)
        plt.title(loss)
        plt.show()
        plt.close()
        with open(base_sample+'%04d'%j+"/params.json") as json_file:
            data_input = json.load(json_file)
        print(data_input)        
        input_temp = np.array([data_input['gold']/11.5,
                               data_input['surfactant']/11.5,
                               data_input['silver']/11.5,
                               data_input['reductant']/11.5])
        input_space.append(input_temp)
        index_total.append([generation_num,j,0])
        
for generation_num in range(1,5):
    plt.figure()
    base_sample="../../0-Exploration/0-Chemical_Space_1/1//MAP_elite_generation_%d//"%generation_num
    total_sample_num = 23
    for j in range(total_sample_num):
        data_temp = obtain_scores(base_sample,j,lower=400,upper=950,num=1101,False_width=10000,plot_flag=False)
        if len(data_temp)>0:
            loss = abs(wavelengths[np.argmax(series_target)]-data_temp[1][0])  + 0.20*(abs(data_temp[3] - series_target).sum())
        else:
            loss = 100000
        output_space.append(loss)
        plt.title(loss)
        plt.show()
        plt.close()
        with open(base_sample+'%04d'%j+"/params.json") as json_file:
            data_input = json.load(json_file)
        print(data_input)        
        input_temp = np.array([data_input['gold']/11.5,
                               data_input['surfactant']/11.5,
                               data_input['silver']/11.5,
                               data_input['reductant']/11.5])
        input_space.append(input_temp)
        index_total.append([generation_num,j,1])
        
for generation_num in range(1,3):
    plt.figure()
    base_sample="../../0-Exploration/0-Chemical_Space_1/2//MAP_elite_generation_%d//"%generation_num
    total_sample_num = 23
    for j in range(total_sample_num):
        data_temp = obtain_scores(base_sample,j,lower=400,upper=950,num=1101,False_width=10000,plot_flag=False)
        if len(data_temp)>0:
            loss = abs(wavelengths[np.argmax(series_target)]-data_temp[1][0])  + 0.20*(abs(data_temp[3] - series_target).sum())
        else:
            loss = 100000
        output_space.append(loss)
        plt.title(loss)
        plt.show()
        plt.close()
        with open(base_sample+'%04d'%j+"/params.json") as json_file:
            data_input = json.load(json_file)
        print(data_input)        
        input_temp = np.array([data_input['gold']/11.5,
                               data_input['surfactant']/11.5,
                               data_input['silver']/11.5,
                               data_input['reductant']/11.5])
        input_space.append(input_temp)
        index_total.append([generation_num,j,2])

In [None]:
output_space = np.array(output_space)
input_space = np.array(input_space)
index_total = np.array(index_total)
np.savetxt('output_space.csv',output_space,delimiter=',')
np.savetxt('input_space.csv',input_space,delimiter=',')

# Test the initial data set

In [None]:
output_space = np.loadtxt('output_space.csv',delimiter=',')
input_space = np.loadtxt('input_space.csv',delimiter=',')
index_total = np.array(index_total)
fintess_raw = -output_space

In [None]:
input_space = input_space[fintess_raw>-100000]
index_total_eff = index_total[fintess_raw>-100000]
fintess_raw = fintess_raw[fintess_raw>-100000]

In [None]:
nearest_N = 10
fitness = []
novelty_total = []
for i in range(len(fintess_raw)):
    novelty = (np.sort(np.sqrt(((np.unique(input_space,axis=0) - input_space[i])**2).sum(axis=1)))[0:nearest_N]).mean()
    fitness_temp =  100*novelty + fintess_raw[i]
    fitness.append(fitness_temp)
    novelty_total.append(novelty)
fitness = np.array(fitness)
novelty_total = np.array(novelty_total)

In [None]:
plt.hist(novelty_total)

# Process data for every generation

In [None]:
for generation_num in range(1,6):
    input("Press Enter to continue after running the platform and acquiring the data ...")
    input_space = []
    output_space = []
    base_sample="./Optimization_generation_%d//"%generation_num
    total_sample_num = 23
    for j in range(total_sample_num):
        plt.figure()
        data_temp = obtain_scores(base_sample,j,lower=400,upper=950,num=1101,False_width=10000,plot_flag=False)
        if len(data_temp)>0:
            loss = abs(wavelengths[np.argmax(series_target)]-data_temp[1][0])+0.20*(abs(data_temp[3] - series_target).sum())
        else:
            loss = 100000
        output_space.append(loss)
        plt.title(loss)
        plt.show()
        plt.close()
        with open(base_sample+'%04d'%j+"/params.json") as json_file:
            data_input = json.load(json_file)
        print(data_input)        
        input_temp = np.array([data_input['gold']/11.5,
                               data_input['surfactant']/11.5,
                               data_input['silver']/11.5,
                               data_input['reductant']/11.5])
        input_space.append(input_temp)


    np.savetxt(base_sample+'input_space.csv',input_space,delimiter=',')
    np.savetxt(base_sample+'output_space.csv',output_space,delimiter=',')

# Filter the optimized solution

In [None]:
output_space = list(np.loadtxt('output_space.csv',delimiter=','))
input_space = list(np.loadtxt('input_space.csv',delimiter=','))

for generation_num in range(1,6):
    base_sample="./Optimization_generation_%d//"%generation_num
    total_sample_num = 23
    for j in range(total_sample_num):
        plt.figure()
        data_temp = obtain_scores(base_sample,j,lower=400,upper=950,num=1101,False_width=10000,plot_flag=False)
        if len(data_temp)>0:
            loss = abs(wavelengths[np.argmax(series_target)]-data_temp[1][0])+0.20*(abs(data_temp[3] - series_target).sum())
        else:
            loss = 100000
        output_space.append(loss)
        plt.title(loss)
        plt.show()
        plt.close()
        with open(base_sample+'%04d'%j+"/params.json") as json_file:
            data_input = json.load(json_file)     
        input_temp = np.array([data_input['gold']/11.5,
                               data_input['surfactant']/11.5,
                               data_input['silver']/11.5,
                               data_input['reductant']/11.5])
        input_space.append(input_temp)

In [None]:
input_space = np.array(input_space)
output_space = np.array(output_space)
fintess_raw = -output_space
input_space = input_space[fintess_raw>-100000]
fintess_raw = fintess_raw[fintess_raw>-100000]

In [None]:
input_space = input_space[np.argsort(-fintess_raw)]
fintess_raw = fintess_raw[np.argsort(-fintess_raw)]
u, indices = np.unique(input_space, axis=0,return_index=True)
input_space_final = input_space[np.sort(indices)]
fintess_raw_final = fintess_raw[np.sort(indices)]

In [None]:
for i in range(len(input_space_final)):
    if (fintess_raw_final[i]>=fintess_raw_final[np.argsort(np.sqrt(((input_space_final[i] - input_space_final)**2).sum(axis=1)))[0:6]]).sum()==6:
        print(i)
        print(input_space_final[i]*11.5)
        print(fintess_raw_final[i])