In [1]:
import random
import numpy as np

from deap import base
from deap import creator
from deap import tools

import matplotlib.pyplot as plt
plt.style.use("seaborn")

%matplotlib inline

In [2]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

# Setup the System
- genome is now 3 params: distance OH1, Distance Oh2, angle H1OH2.
- Molecule will always be in xy plane


In [3]:
toolbox = base.Toolbox()

def init_gene():
    return random.gauss(1, 1)

toolbox.register("init_gene", init_gene)
toolbox.register("init_individual", tools.initRepeat, 
                 creator.Individual, toolbox.init_gene, 3)

toolbox.register("init_population", tools.initRepeat, 
                 list, toolbox.init_individual)

In [4]:
from pyscf.gto import Mole
from pyscf.scf import RHF

def build_molecule_from_genome(genome):
    
    atoms = [
        ("O", 0, 0, 0),
        ("H", genome[0], 0, 0),
        ("H", genome[1] * np.cos(genome[2]), genome[1] * np.sin(genome[2]), 0)
    ]
    
    mol = Mole()
    mol.atom = atoms
    #mol.basis = "6-31++g**"
    mol.basis = "sto-3g"
    mol.build()
    return mol
    
def evaluateFitness(individual):
    
    mol = build_molecule_from_genome(individual)
    
    mf = RHF(mol)
    mf.verbose = 0
    E = mf.scf()
    
    # this shit has to be a tuple!!
    return E,

toolbox.register("evaluate", evaluateFitness)

  from ._conv import register_converters as _register_converters


In [5]:
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.05) # flip gene with 0.05 % probability
toolbox.register("select", tools.selTournament, tournsize=3)

# Do the Optimazation 

In [6]:
PROBABILITY_CROSSING = 0.5
PROBABILITY_MUTATION = 0.2

MAX_ITERATIONS = 100

CONVERGENCE_THRESHOLD = 1e-20

E_old = 1e10

In [12]:
SIZE_POPULATION = 20

population = toolbox.init_population(n=SIZE_POPULATION)

In [13]:
population

[[0.7572941488891316, -0.7599535077456965, 2.0285371775938827],
 [0.5564458597451798, 0.5346089999202958, 2.176703062353366],
 [2.039354869028638, 0.8952890530012845, -0.10516452920092001],
 [-0.28106557095373264, 1.0762027460521044, 1.5375374154155725],
 [1.1043740471634347, 0.760897841266471, 0.89921726032508],
 [-1.2339768068870431, 2.01931082623239, 0.13555290922882846],
 [1.3948771794064896, -0.41024312648824157, 0.408713358754234],
 [0.893673971505689, 0.699020451117589, 2.379035111755665],
 [1.4635154233522598, -0.5117161098369907, 1.6020062070616055],
 [-0.11709000747312404, 1.163386105099015, 1.9228149192696729],
 [1.9652294778949724, 1.4730335397476593, 1.8627627213381295],
 [-0.3151979962950604, 1.5389596270440964, 2.2265682198683807],
 [-0.14827400998460782, 2.217186567659356, 0.8149172109395884],
 [2.4638443562264185, 2.516593355680627, 1.1294954037603735],
 [0.6109337966239587, 1.2690305490662899, 1.1548357256054282],
 [0.5538683704284447, 0.9374951499333799, 1.8075471251

In [8]:
fitnesses = list(map(toolbox.evaluate, population))

for ind, fit in zip(population, fitnesses):
    ind.fitness.values = fit

In [9]:
best_ind = tools.selBest(population, 1)[0]
print("Pre:")
print(best_ind, best_ind.fitness.values)

Pre:
[1.2465210690315611, 0.9230124195722169, 1.3380394711474881] (-74.88988729943918,)


In [10]:
# create a list of fitness values
fitness_values = [ind.fitness.values[0] for ind in population]

# Begin the evolution
for i in range(MAX_ITERATIONS):

    # Select next generation
    offspring = toolbox.select(population, len(population))
    offspring = list(map(toolbox.clone, offspring))

    # do cross over
    for child1, child2 in zip(offspring[::2], offspring[1::2]):

        if random.random() < PROBABILITY_CROSSING:
            toolbox.mate(child1, child2)

            del child1.fitness.values
            del child2.fitness.values
            
    # do mutation
    for mutant in offspring:
        if random.random() < PROBABILITY_MUTATION:
            toolbox.mutate(mutant)
            del mutant.fitness.values
    
    # recalculate fitness values of mates and mutants
    invalid_individuals = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_individuals)
    for ind, fit in zip(invalid_individuals, fitnesses):
        ind.fitness.values = fit
    
    population[:] = offspring
    
    # update list of fitness value
    fitness_values = [ind.fitness.values[0] for ind in population]
    
    #E = sum(fitness_values) / len(fitness_values)
    E = sum(fitness_values) / float(SIZE_POPULATION)
    print("Generation: " + str(i+1))
    print(E, np.abs(E_old-E))
    
    
    if np.abs(E - E_old) < CONVERGENCE_THRESHOLD:
        print("\n\nCONVERGED!\n")
        break

    else:
        E_old = E
        
    

Generation: 1
-73.96186633035069 10000000073.961866
Generation: 2
-74.7113546362583 0.7494883059076187
Generation: 3
-74.78149783572361 0.07014319946530634
Generation: 4
-74.85550375806051 0.07400592233689451
Generation: 5
-74.9042557634867 0.048752005426194955
Generation: 6
-74.92222860123107 0.017972837744366643
Generation: 7
-74.94857922609394 0.02635062486287154
Generation: 8
-74.32482400882732 0.6237552172666199
Generation: 9
-74.94944891727486 0.6246249084475437
Generation: 10
-74.94139873801625 0.008050179258617618
Generation: 11
-74.94301789921875 0.001619161202498276
Generation: 12
-74.79527925757316 0.1477386416455886
Generation: 13
-74.95227558412066 0.15699632654749962
Generation: 14
-74.83702111216262 0.11525447195803906
Generation: 15
-74.95968823499123 0.12266712282861647
Generation: 16
-74.91601233040052 0.0436759045907138
Generation: 17
-74.9582718303819 0.0422594999813839
Generation: 18
-74.94285885126794 0.015412979113961
Generation: 19
-74.94730730163622 0.004448450

KeyboardInterrupt: 

# Checking the Result 

In [None]:
best_ind = tools.selBest(population, 1)[0]
print("Distances: \n - {0}\n - {1}".format(best_ind[0], best_ind[1]))
print("Angle: {0}".format(best_ind[2] * 360 / (2 * np.pi) % 360))
print("Energy: {0}".format(best_ind.fitness.values))

In [None]:
mol = build_molecule_from_genome(best_ind)
mol.atom

In [None]:
import matplotlib.pylab as plt

x = [t[1] for t in mol.atom]
y = [t[2] for t in mol.atom]
z = [t[3] for t in mol.atom]

lower, upper = -1.5, 1.5

plt.subplot(2, 2, 1)
plt.scatter(x[0], y[0], label="C")
plt.scatter(x[1:], y[1:], marker="x", label="H")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim(lower, upper)
plt.ylim(lower, upper)

plt.subplot(2, 2, 2)
plt.scatter(x[0], z[0], label="C")
plt.scatter(x[1:], z[1:], marker="x", label="H")
plt.xlabel("x")
plt.ylabel("z")
plt.xlim(lower, upper)
plt.ylim(lower, upper)

plt.subplot(2, 2, 3)
plt.scatter(y[0], z[0], label="C")
plt.scatter(y[1:], z[1:], marker="x", label="H")
plt.xlabel("y")
plt.ylabel("z")
plt.xlim(lower, upper)
plt.ylim(lower, upper)

plt.show()