In [3]:
import numpy as np
from pyscf import gto




In [2]:
molecule_str = \
"""
C
H   1 1.089000
H   1 1.089000  2  109.4710
H   1 1.089000  2  109.4710  3  120.0000
H   1 1.089000  2  109.4710  3 -120.0000
"""

In [9]:
mol = gto.Mole()

mol.atom = molecule_str

mol.build()
mol.atom_coords()

array([[ 0.        ,  0.        ,  0.        ],
       [ 2.05791175,  0.        ,  0.        ],
       [-0.68596311,  0.        ,  1.94022045],
       [-0.68596311,  1.6802802 , -0.97011022],
       [-0.68596311, -1.6802802 , -0.97011022]])

In [53]:
class Genome(object):
    def __init__(self, first=None, second=None, third=None, species=[], genome=[]):
        self.first_atom = first
        self.second_atom = second
        self.third_atom = third
        
        self.species = species
        self.genome = genome

        
def calculate_genome(matrix_str):
    genome = Genome()
    
    lines =[line for line in matrix_str.split("\n") if line]

    if len(lines) < 4:
        raise ValueError("Molecule must have 4 atoms at least!")
    
    # first line
    genome.first_atom = lines[0].split()[0]
    
    # second line
    splits = lines[1].split()
    genome.second_atom = splits[0]
    genome.genome.append(float(splits[2]))
    
    # third line
    splits = lines[2].split()
    genome.third_atom = splits[0]
    genome.genome.append(float(splits[2]))
    genome.genome.append(float(splits[4]))


    for line in lines[3:]:
        split = line.split()

        genome.species.append((split[0], [split[1], split[3], split[5]]))
        genome.genome += [float(split[2]), float(split[4]), float(split[6])]

    return genome

GENOME = calculate_genome(molecule_str)
genome.species, genome.genome

([('H', ['1', '2', '3']), ('H', ['1', '2', '3'])],
 [1.089, 1.089, 109.471, 1.089, 109.471, 120.0, 1.089, 109.471, -120.0])

In [54]:

# TODO add to genome class
def create_z_matrix(genome):
    matrix_str = genome.first_atom + "\n"
    matrix_str += genome.second_atom + " 1 " + str(genome.genome[0]) + "\n"
    matrix_str += genome.third_atom + " 1 " + str(genome.genome[1]) + " 2 " + str(genome.genome[2]) + "\n"
    
    i = 3
    for (species, reference) in genome.species:
        
        matrix_str += " ".join(
            [
                species, 
                reference[0], 
                str(genome.genome[i]), 
                reference[1], 
                str(genome.genome[i + 1]), 
                reference[2], 
                str(genome.genome[i + 2])
            ]
        ) + "\n"
        
        i += 3
        
    return matrix_str

def build_molecule_from_genome(genome):
    
    mol = gto.Mole()
    mol.atm = create_z_matrix(Genome(
        GENOME.first_atom,
        GENOME.second_atom,
        GENOME.third_atom,
        GENOME.species,
        genome
    ))
    
    mol.build()
    
    return mol
    
    

create_molecule(genome)

[(('H', ['1', '2', '3']), 1.089), (('H', ['1', '2', '3']), 109.471)]


'C\nH 1 1.089\nH 1 1.089 2 109.471\nH 1 1.089 2 109.471 3 120.0\nH 1 1.089 2 109.471 3 -120.0\n'

In [48]:
molecule_str.split("\n")

['',
 'C',
 'H   1 1.089000',
 'H   1 1.089000  2  109.4710',
 'H   1 1.089000  2  109.4710  3  120.0000',
 'H   1 1.089000  2  109.4710  3 -120.0000',
 '']

# Apply to the Library 

In [46]:
from deap import base
from deap import creator
from deap import tools

import random

In [45]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

## Initializing 

In [49]:
toolbox = base.Toolbox()



def init_individual():
    
    return [x + random.gauss(0, 1) for x in genome.genome ]


toolbox.register("init_individual", tools.initIterate, 
                 creator.Individual, init_individual)

toolbox.register("init_population", tools.initRepeat, 
                 list, toolbox.init_individual)

In [52]:
population = toolbox.init_population(n=3)
population

[[0.9933648175652079,
  0.32808625939828917,
  107.98311187331588,
  1.2821075987633797,
  108.77333068325345,
  119.05270253210277,
  0.8574873395772349,
  109.94196130759418,
  -120.54011878355298],
 [1.866183576846456,
  0.2784914528227326,
  109.4374245038215,
  2.6961161787917955,
  109.89249110156872,
  120.14652240726917,
  -0.3456593346240906,
  109.56885283678673,
  -119.24213460887758],
 [2.3290603945878274,
  0.30701196381785545,
  108.27601961810777,
  0.4432478591013812,
  110.45251012205816,
  119.30368691840854,
  0.8269974088941737,
  109.87513625155908,
  -119.29433843300654]]

## Energy 

In [55]:
def evaluateFitness(individual):
    
    mol = build_molecule_from_genome(individual)
    
    mf = RHF(mol)
    mf.verbose = 0
    E = mf.scf()
    
    # this shit has to be a tuple!!
    return E,

toolbox.register("evaluate", evaluateFitness)