In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
os.chdir('..')

In [2]:
import pygad
import torch
import pickle
import json
import pandas as pd
import numpy as np
from modules.functions import data_generator_vec, check_cuda
from sklearn.neighbors import KernelDensity


In [3]:
model_path = 'saved_models'
if os.path.exists(os.path.join(model_path, 'GAN_generator.pt')):
    generator = torch.jit.load(os.path.join(model_path, 'GAN_generator.pt'), map_location='cpu')
else:
    print('train model first!')

el_list_loc = 'misc/element_order_uts.pkl'
with open(el_list_loc,'rb') as fid:
    el_list = pickle.load(fid)

with open('misc/scaler_y.pkl','rb') as fid:
    uts_scaler = pickle.load(fid)

with open('misc/starting_comp.json','r') as fid:
    json_dict = json.load(fid)

In [4]:
dataset = pd.read_csv('dataset/synthetic_dataset.csv', index_col = 0)
to_train_df = dataset[dataset['uts1200C']>0].copy()
comp_dset = data_generator_vec(to_train_df['Composition'], el_list=el_list)
vec_comps = comp_dset.real_data
y = to_train_df['uts1200C'].values.reshape(-1,1).astype('float32')
y_scaled = uts_scaler.transform(y)
kde = KernelDensity(kernel='gaussian',bandwidth=0.5)
v = kde.fit(y_scaled)

def prop_sampler(n_samples):
    return kde.sample(n_samples).astype('float32')

def noise_sampler(N, z_dim):
    return np.random.normal(size=[N, z_dim]).astype('float32')

latent_dim = 4
cuda = check_cuda()

In [98]:
desired_uts = np.array(json_dict['start_uts']).reshape(-1,1).astype('float32')
def ga_inputs(N, z_dim = latent_dim):
    prop = torch.from_numpy(desired_uts)*torch.ones((N,1))
    noise = torch.from_numpy(noise_sampler(N,z_dim))
    return(torch.cat([noise,prop], dim=-1))

functional_inputs = ga_inputs(1)
desired_output = np.array(json_dict['start_comp'])


In [184]:
functional_input = functional_inputs
output = generator(functional_inputs[:,:-1].reshape(-1,latent_dim),functional_inputs[:,-1].reshape(-1,1))
output = output.to('cpu').detach().numpy()
idx = np.argwhere(np.array(json_dict['start_comp'])==0)
output[:,idx] = 0
weights = np.abs(output - desired_output)
distance = ((output.flatten() - desired_output.flatten())**2).reshape(1,-1)
distance*=weights
fitness = 1/np.sum(distance) - (1-np.sum(output))
fitness

7.392408508777949

In [213]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
def fitness_func(solution, solution_idx):
    functional_input = solution
    output = generator(functional_inputs[:,:-1].reshape(-1,latent_dim),functional_inputs[:,-1].reshape(-1,1))
    output = output.to('cpu').detach().numpy()
    idx = np.argwhere(np.array(json_dict['start_comp'])==0)
    output[:,idx] = 0
    #weights = np.abs(output - desired_output)
    distance = mean_absolute_error(output.flatten(),desired_output.flatten())
    fitness = 1/distance - (1-np.sum(output))
    return fitness

In [214]:
fitness_function = fitness_func

num_generations = 5000
num_parents_mating = 2

sol_per_pop = 20
num_genes = len(functional_inputs)

initial_population = ga_inputs(1000)

init_range_low = -2
init_range_high = 5

parent_selection_type = "sss"
keep_parents = 1

crossover_type = "single_point"

mutation_type = "adaptive"
mutation_probability = (0.3,0.05)
mutation_percent_genes = (20,5)

In [215]:
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop,
                       num_genes=num_genes,
                       init_range_low=init_range_low,
                       init_range_high=init_range_high,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes,
                       mutation_probability=mutation_probability,
                       initial_population=initial_population)
ga_instance.run()

If you do not want to mutate any gene, please set mutation_type=None.


In [216]:
solution, solution_fitness, solution_idx = ga_instance.best_solution()

In [217]:
out = generator(torch.from_numpy(solution[:-1].reshape(-1,latent_dim).astype('float32')),torch.from_numpy(solution[-1].reshape(-1,1).astype('float32')))
out

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 4.1394e-31, 0.0000e+00, 0.0000e+00,
         3.9356e-19, 0.0000e+00, 9.7133e-01, 5.1848e-43, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 2.2907e-17, 0.0000e+00, 0.0000e+00, 2.8671e-02, 5.7876e-40]],
       grad_fn=<DifferentiableGraphBackward>)

In [149]:
np.array(json_dict['start_comp'])

array([0.        , 0.        , 0.        , 0.20112187, 0.        ,
       0.        , 0.18028086, 0.        , 0.05807044, 0.06428983,
       0.        , 0.        , 0.        , 0.        , 0.06899341,
       0.        , 0.06850126, 0.35874233])