In [1]:
import numpy as np
import pandas as pd
import quantecon as qe
from ast import literal_eval
from sklearn.ensemble import RandomForestRegressor

In [2]:
def creating_samples(samples_number, element):
    gen=0
    uniform_number = len(element)
    uniform_samples = (-qe.simplex_grid(len(element), 1)+1)/(len(element)-1)
    if samples_number > uniform_number:
        random_samples = np.random.rand((samples_number-uniform_number), len(element))
        for i in range(samples_number-uniform_number):
            random_samples[i] = np.around(random_samples[i]/sum(random_samples[i]), decimals = 3)
        sample = np.vstack((uniform_samples, random_samples))
    else:
        sample = uniform_samples[:samples_number]
    samples = []
    for i in range(samples_number):
        samples.append(list(sample[i]))
    samples = np.array(samples)
    generation = []
    for i in range (samples_number):
            generation.append(gen)

    ID = np.arange(1, (samples_number+1))

    data = {'ID' : ID, 'Elements': [element], 'Generation': generation}
    df = pd.DataFrame(data=data, index = np.arange(samples_number))
    df = pd.concat([df, pd.DataFrame(([[i] for i in samples]), columns = ['Position'])], axis = 1)
    df.to_csv("Result/Initial Population.txt", sep='\t', index=False, mode='w')
    return df

In [3]:
#Loading data and fitting
df_old = pd.read_csv('Data/AuIrOsPdPtReRhRu_0.60_compositions_and_targets.csv')
X_columns_old = ['Pt','Pd','Au','Ru','Rh','Ir','Re','Os']
x_old = df_old[X_columns_old].to_numpy()
y_old = df_old['current_over_capacitance'].to_numpy()

#Rndom Forest Regression
reg = RandomForestRegressor(n_estimators = 1024,
                           bootstrap = True,
                           max_depth = None,
                           max_features = 'auto',
                           min_samples_leaf = 1,
                           min_samples_split = 2,
                           oob_score = True)
reg = reg.fit(x_old, y_old)

In [4]:
#Determine the best position
grid = qe.simplex_grid(3, 100)/100
zeros = np.zeros(len(grid)).reshape(-1,1)

grid = np.concatenate((grid, zeros), axis=1)
grid = np.concatenate((grid, zeros), axis=1)
grid = np.concatenate((grid, zeros), axis=1)
grid = np.concatenate((grid, zeros), axis=1)
grid = np.concatenate((grid, zeros), axis=1)
grid = pd.DataFrame(grid,columns=[X_columns_old])
grid['Activity'] = reg.predict(grid.to_numpy())
a = grid.iloc[np.argmin(grid['Activity'])][0:3].to_numpy()

In [5]:
#Making Class
class pso:
    def __init__ (self, data, step, target = np.array([0.333, 0.333, 0.333])):
        self.datalog = data
        #Reading "Elements" columns from string to list
        self.datalog['Elements'] = self.datalog["Elements"].apply(lambda x: literal_eval(x))
        
        #Select latest generation
        self.generation = self.datalog['Generation'].max()
        
        #Creating np.array of "Position" column and dropping the string type "Position" column
        self.position = []
        for i in range(self.datalog['ID'].max()):
            self.position.append(list(np.fromstring(self.datalog['Position'][i][1:-1], dtype=float, sep=' ')))
        self.position = np.array(self.position)
        self.datalog = self.datalog.drop(columns=['Position'])
        self.datalog = pd.concat([self.datalog, pd.DataFrame(([[i] for i in self.position]), columns = ['Position'])], axis = 1)
        
        #Creating "Velocity" column for the "0" generation
        if self.generation == 0:
            self.velocity = np.around((-(self.position - target)/step), decimals = 3)
            self.datalog = pd.concat([self.datalog, pd.DataFrame(([[i] for i in self.velocity]), columns = ['Velocity'])], axis = 1)

        
        #Creating blank "Activity" column
        self.datalog = pd.concat([self.datalog, pd.DataFrame(columns = ['Activity'], index = np.arange(self.datalog['ID'].max()))], axis = 1)      

        #Filling the "Activity" column with RFR
        self.f_activity(self.datalog)
            
        #Creating dataframe of the latest generation
        self.working_generation = self.datalog.loc[self.datalog['Generation']==self.generation]
    
    
            
    def move(self):
        #Performing move function  
        self.generation += 1
        self.working_generation['Generation'] += 1 
        for i in range(len(self.working_generation)):
            new_position = self.working_generation['Position'][i] + self.working_generation['Velocity'][i]
            self.working_generation.at[i,'Position'] = np.around(new_position, decimals = 3)
        
            
            
        """#Checking whether the new position cross the boundaries (UNFINISHED)
        if new_position.max() > 1 or new_position.min() < 0:
            new_position = self.working_generation['Position'][i] - self.working_generation['Velocity'][i]*size
        self.working_generation.at[i,'Position'] = np.around(new_position, decimals = 3)"""
        
        
        #Filling the "Activity" column with RFR
        self.f_activity(self.working_generation)
        
        #Concating the tables
        self.datalog = pd.concat([self.datalog, self.working_generation])
        self.datalog = self.datalog.reset_index(drop=True)
        self.store_datalog()
        return
    
    def f_activity(self, dataframe):
        global reg
        a = [0, 0, 0, 0, 0]
        for i in range(len(dataframe)):
            dataframe.at[i, 'Activity'] = float(reg.predict(np.reshape((np.hstack((dataframe.at[i, 'Position'], a))), (1, -1))))
        return 

    def store_datalog(self):
        self.datalog.to_csv("Result/Initial Population_gen "+str(self.generation)+".txt", sep='\t', index=False, mode='w')
        return
    
    def normalize_velocity(self):
        for i in range(len(self.working_generation)):
            self.working_generation.at[i, 'Velocity'] = self.working_generation.at[i, 'Velocity'] - (sum(self.working_generation.at[i, 'Velocity'])/len(self.working_generation['Elements'][0]))
            self.working_generation.at[i, 'Velocity'] = np.around(self.working_generation.at[i, 'Velocity'], decimals= 3)
        return
    
    def normalize_position(self):
        for i in range(len(self.working_generation)):
            self.working_generation.at[i, 'Position'] = self.working_generation.at[i, 'Position']/(sum(self.working_generation.at[i, 'Position']))
            self.working_generation.at[i, 'Position'] = np.around(self.working_generation.at[i, 'Position'], decimals= 3)
        return 
    
    
    def gen_best(self):
        return [self.working_generation['Position'][np.argmin(self.working_generation['Activity'])], self.working_generation['Activity'].min()]
           
   
    def individual_best(self, ID):
        self.i_data = self.datalog[self.datalog['ID']==ID].reset_index(drop=True)
        return [self.i_data['Position'][np.argmin(self.i_data['Activity'])], self.i_data['Activity'].min()]
    
    def delta_activity(self, ID, parameter):
        self.working_generation.at[ID, 'Activity'] - parameter[1]
   
    def delta_position:
    
    def velocity:
    

In [None]:
 def velocity(self, dataframe, p, q, r):
        for i in range(len(dataframe)):
            dataframe.at[i, 'Velocity'] = -(damping*dataframe.at[i, 'Velocity'] 
                                           + p * abs(dataframe.at[i, 'Position'] - self.g_best()[1]) * (dataframe.at[i, 'Position'] - self.g_best()[0]) 
                                           + q * abs(dataframe.at[i, 'Position'] - self.gen_best()[1]) * (dataframe.at[i, 'Position'] - self.gen_best()[0])
                                           + r * abs(dataframe.at[i, 'Position'] - self.individual_best(i+1)[1]) * (dataframe.at[i, 'Position'] - self.individual_best(i+1)[0]))

In [6]:
samples_number = 6
element = ['Pt', 'Pd', 'Au']
creating_samples(samples_number, element)

Unnamed: 0,ID,Elements,Generation,Position
0,1,"[Pt, Pd, Au]",0,"[0.5, 0.5, 0.0]"
1,2,"[Pt, Pd, Au]",0,"[0.5, 0.0, 0.5]"
2,3,"[Pt, Pd, Au]",0,"[0.0, 0.5, 0.5]"
3,4,"[Pt, Pd, Au]",0,"[0.559, 0.276, 0.165]"
4,5,"[Pt, Pd, Au]",0,"[0.119, 0.067, 0.814]"
5,6,"[Pt, Pd, Au]",0,"[0.782, 0.039, 0.179]"


In [7]:
step = 10
target = np.array([0.333, 0.333, 0.333])
population = pso(pd.read_csv('Result/Initial Population.txt', sep='\t'), step, target)

In [8]:
population.datalog

Unnamed: 0,ID,Elements,Generation,Position,Velocity,Activity
0,1,"[Pt, Pd, Au]",0,"[0.5, 0.5, 0.0]","[-0.017, -0.017, 0.033]",-0.931286
1,2,"[Pt, Pd, Au]",0,"[0.5, 0.0, 0.5]","[-0.017, 0.033, -0.017]",-0.903849
2,3,"[Pt, Pd, Au]",0,"[0.0, 0.5, 0.5]","[0.033, -0.017, -0.017]",-0.26713
3,4,"[Pt, Pd, Au]",0,"[0.559, 0.276, 0.165]","[-0.023, 0.006, 0.017]",-0.949176
4,5,"[Pt, Pd, Au]",0,"[0.119, 0.067, 0.814]","[0.021, 0.027, -0.048]",-0.693977
5,6,"[Pt, Pd, Au]",0,"[0.782, 0.039, 0.179]","[-0.045, 0.029, 0.015]",-0.884401


In [9]:
population.move()

In [None]:
population.working_generation

In [10]:
population.datalog

Unnamed: 0,ID,Elements,Generation,Position,Velocity,Activity
0,1,"[Pt, Pd, Au]",0,"[0.5, 0.5, 0.0]","[-0.017, -0.017, 0.033]",-0.931286
1,2,"[Pt, Pd, Au]",0,"[0.5, 0.0, 0.5]","[-0.017, 0.033, -0.017]",-0.903849
2,3,"[Pt, Pd, Au]",0,"[0.0, 0.5, 0.5]","[0.033, -0.017, -0.017]",-0.26713
3,4,"[Pt, Pd, Au]",0,"[0.559, 0.276, 0.165]","[-0.023, 0.006, 0.017]",-0.949176
4,5,"[Pt, Pd, Au]",0,"[0.119, 0.067, 0.814]","[0.021, 0.027, -0.048]",-0.693977
5,6,"[Pt, Pd, Au]",0,"[0.782, 0.039, 0.179]","[-0.045, 0.029, 0.015]",-0.884401
6,1,"[Pt, Pd, Au]",1,"[0.483, 0.483, 0.033]","[-0.017, -0.017, 0.033]",-0.934144
7,2,"[Pt, Pd, Au]",1,"[0.483, 0.033, 0.483]","[-0.017, 0.033, -0.017]",-0.904486
8,3,"[Pt, Pd, Au]",1,"[0.033, 0.483, 0.483]","[0.033, -0.017, -0.017]",-0.281521
9,4,"[Pt, Pd, Au]",1,"[0.536, 0.282, 0.182]","[-0.023, 0.006, 0.017]",-0.956526


In [None]:
np.sqrt(sum((a - population.g_best()[0])**2))

In [None]:
a

In [None]:
i_data