In [1]:
import numpy as np
import pandas as pd
import quantecon as qe
from ast import literal_eval
from sklearn.ensemble import RandomForestRegressor

In [None]:
def creating_samples(samples_number, element):
    gen=0
    uniform_number = len(element)
    uniform_samples = (-qe.simplex_grid(len(element), 1)+1)/(len(element)-1)
    if samples_number > uniform_number:
        random_samples = np.random.rand((samples_number-uniform_number), len(element))
        for i in range(samples_number-uniform_number):
            random_samples[i] = np.around(random_samples[i]/sum(random_samples[i]), decimals = 3)
        sample = np.vstack((uniform_samples, random_samples))
    else:
        sample = uniform_samples[:samples_number]
    samples = []
    for i in range(samples_number):
        samples.append(list(sample[i]))
    samples = np.array(samples)
    generation = []
    for i in range (samples_number):
            generation.append(gen)

    ID = np.arange(1, (samples_number+1))

    data = {'ID' : ID, 'Elements': [element], 'Generation': generation}
    df = pd.DataFrame(data=data, index = np.arange(samples_number))
    df = pd.concat([df, pd.DataFrame(([[i] for i in samples]), columns = ['Position'])], axis = 1)
    df.to_csv("Result/Initial Population.txt", sep='\t', index=False, mode='w')
    return df

In [None]:
#Loading data and fitting
df_old = pd.read_csv('Data/AuIrOsPdPtReRhRu_0.60_compositions_and_targets.csv')
X_columns_old = ['Pt','Pd','Au','Ru','Rh','Ir','Re','Os']
x_old = df_old[X_columns_old].to_numpy()
y_old = df_old['current_over_capacitance'].to_numpy()

#Rndom Forest Regression
reg = RandomForestRegressor(n_estimators = 1024,
                           bootstrap = True,
                           max_depth = None,
                           max_features = 'auto',
                           min_samples_leaf = 1,
                           min_samples_split = 2,
                           oob_score = True)
reg = reg.fit(x_old, y_old)

In [None]:
#Making Class
class pso:
    def __init__ (self, data, step, target = np.array([0.333, 0.333, 0.333])):
        self.datalog = data
        #Reading "Elements" columns from string to list
        self.datalog['Elements'] = self.datalog["Elements"].apply(lambda x: literal_eval(x))
        
        #Select latest generation
        self.generation = self.datalog['Generation'].max()
        
        #Creating np.array of "Position" column and dropping the string type "Position" column
        self.position = []
        for i in range(self.datalog['ID'].max()):
            self.position.append(list(np.fromstring(self.datalog['Position'][i][1:-1], dtype=float, sep=' ')))
        self.position = np.array(self.position)
        self.datalog = self.datalog.drop(columns=['Position'])
        self.datalog = pd.concat([self.datalog, pd.DataFrame(([[i] for i in self.position]), columns = ['Position'])], axis = 1)
        
        #Creating "Velocity" column for the "0" generation
        if self.generation == 0:
            self.velocity = np.around((-(self.position - target)/step), decimals = 3)
            self.datalog = pd.concat([self.datalog, pd.DataFrame(([[i] for i in self.velocity]), columns = ['Velocity'])], axis = 1)
        
        #Creating blank "Activity" column
        self.datalog = pd.concat([self.datalog, pd.DataFrame(columns = ['Activity'], index = np.arange(self.datalog['ID'].max()))], axis = 1)      

        #Filling the "Activity" column with RFR
        self.f_activity(self.datalog)
            
        #Creating dataframe of the latest generation
        self.working_generation = self.datalog.loc[self.datalog['Generation']==self.generation]
    
    
            
    def move(self, size=1):
        #Performing move function for certain number of step size   
        self.generation += 1
        self.working_generation['Generation'] += 1 
        for i in range(len(self.working_generation)):
            new_position = self.working_generation['Position'][i] + self.working_generation['Velocity'][i]*size
            self.working_generation.at[i,'Position'] = np.around(new_position, decimals = 3)
            
            """#Checking whether the new position cross the boundaries (UNFINISHED)
            if new_position.max() > 1 or new_position.min() < 0:
                new_position = self.working_generation['Position'][i] - self.working_generation['Velocity'][i]*size
            self.working_generation.at[i,'Position'] = np.around(new_position, decimals = 3)"""
        
        #Filling the "Activity" column with RFR
        self.f_activity(self.working_generation)
        
        #Concating the tables
        self.datalog = pd.concat([self.datalog, self.working_generation])
        self.datalog = self.datalog.reset_index(drop=True)
        self.store_datalog()
        return
    
    def f_activity(self, dataframe):
        global reg
        a = [0, 0, 0, 0, 0]
        for i in range(len(dataframe)):
            dataframe.at[i, 'Activity'] = float(reg.predict(np.reshape((np.hstack((dataframe.at[i, 'Position'], a))), (1, -1))))
        return 

    def store_datalog(self):
        self.datalog.to_csv("Result/Initial Population_gen "+str(self.generation)+".txt", sep='\t', index=False, mode='w')
        return
    
    def normalize_velocity(self, dataframe):
        for i in range(len(dataframe)):
            dataframe.at[i, 'Velocity'] = dataframe.at[i, 'Velocity'] - (sum(dataframe.at[i, 'Velocity'])/len(dataframe['Elements'][0]))
            dataframe.at[i, 'Velocity'] = np.around(dataframe.at[i, 'Velocity'], decimals= 3)
        return
    
    def normalize_position(self, dataframe):
        for i in range(len(dataframe)):
            dataframe.at[i, 'Position'] = dataframe.at[i, 'Position']/(sum(dataframe.at[i, 'Position']))
            dataframe.at[i, 'Position'] = np.around(dataframe.at[i, 'Position'], decimals= 3)
        return 
           
    
    def g_best(self):
        self.g_best = self.datalog['Position'][np.argmin(self.datalog['Activity'])]
        return
    
    def gen_best(self):
        self.gen_best = self.working_generation['Position'][np.argmin(self.working_generation['Activity'])]
        return
    
    
    def i_best(self):
        self.i_best = []
        for i in range(len(self.working_generation)):
            self.i_best.append(list(self.datalog['Position'][self.datalog['ID']==(i+1)].reset_index(drop=True)[np.argmin(self.datalog['Activity'][self.datalog['ID']==(i+1)])]))
        self.i_best = np.array(self.i_best)   
        return
    
    
    
    
    
    
    
    

In [None]:
samples_number = 6
element = ['Pt', 'Pd', 'Au']
creating_samples(samples_number, element)

In [None]:
step = 5
target = np.array([0.9, 0.1, 0])
population = pso(pd.read_csv('Result/Initial Population.txt', sep='\t'), step, target)

In [None]:
population.datalog

In [None]:
population.move()

In [None]:
population.working_generation

In [None]:
population.datalog