In [3]:
import numpy as np
import pandas as pd
from ase.build import fcc111
from ase.constraints import FixAtoms
from ase.ga.data import PrepareDB, DataConnection
from ase.ga import get_raw_score
from ase.ga.startgenerator import StartGenerator
from ga_bulk_relax import relax
from ase.ga.utilities import closest_distances_generator, get_all_atom_types
from ase.ga.offspring_creator import OperationSelector
from ase.ga.ofp_comparator import OFPComparator
from ase.ga.population import Population
from ase.ga.soft_mutation import SoftMutation
from ase.ga.standardmutations import StrainMutation
from ase.ga.utilities import CellBounds, closest_distances_generator
from ase.io import write
from ase.ga.cutandsplicepairing import CutAndSplicePairing
from ase import Atoms
from ase.data import atomic_numbers

from chgnet.model import StructOptimizer

from AlphaCrystal.cryspnet.cryspnet.utils import FeatureGenerator, load_input, dump_output, group_outputs, topkacc
from AlphaCrystal.cryspnet.cryspnet.models import load_Bravais_models, load_Lattice_models, load_SpaceGroup_models
from AlphaCrystal.cryspnet.cryspnet.config import *

import warnings
import os
warnings.filterwarnings("ignore", module="pymatgen")
warnings.filterwarnings("ignore", module="ase")
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
from pymatgen.core.composition import Composition, Element
from tqdm import tqdm

In [14]:
which = "oxide"
batch_size = 256
use_cpu = True
formulas = ["TiV2CrO10"]
topn_bravais = 1
topn_spacegroup=1
N = 20
default_bounds={'phi': [20, 160], 'chi': [20, 160],'psi': [20, 160], 
                'a': [2, 60], 'b': [2, 60], 'c': [2, 60]}
default_v = 300
db_file_name = "gadb2.db"

In [15]:
BE = load_Bravais_models(
    n_ensembler = 5,
    which = which,
    batch_size = batch_size,
    cpu=use_cpu
)
LPB = load_Lattice_models(batch_size = batch_size, cpu=use_cpu)
SGB = load_SpaceGroup_models(batch_size = batch_size, cpu=use_cpu)
featurizer = FeatureGenerator()

In [16]:
data = pd.DataFrame({"formula": formulas})
ext_magpie = featurizer.generate(data)

StrToComposition: 100%|██████████| 1/1 [00:00<00:00, 27.83it/s]
MultipleFeaturizer: 100%|██████████| 1/1 [00:00<00:00, 33.75it/s]


In [17]:
bravais_probs, bravais = BE.predicts(ext_magpie, topn_bravais=topn_bravais)
lattices = []
spacegroups = []
spacegroups_probs = []

for i in range(topn_bravais):
    ext_magpie["Bravais"] = bravais[:, i]
    lattices.append(LPB.predicts(ext_magpie))
    sg_prob, sg = SGB.predicts(ext_magpie, topn_spacegroup=topn_spacegroup)
    spacegroups.append(sg)
    spacegroups_probs.append(sg_prob)

out = group_outputs(bravais, bravais_probs, spacegroups, spacegroups_probs, lattices, data)

In [18]:
out

Unnamed: 0_level_0,formula,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais,Top-1 Bravais
Unnamed: 0_level_1,-,Bravais,Bravais prob,a,b,c,alpha,beta,gamma,v,Top-1 SpaceGroup,Top-1 SpaceGroup prob
0,TiV2CrO10,cubic (F),0.341207,31.440802,31.440802,31.440802,90.0,90.0,90.0,31079.987801,216,0.738137


In [19]:
def formula_to_blocks(formula):
    comp = Composition(formula).as_dict()
    block = []
    for elem in comp:
        block += [elem] * int(comp[elem])
    return block

In [20]:
blocks = formula_to_blocks(formulas[0])

In [21]:
def lattice_to_sg(lattice, blocks, pm=10):
    if lattice['Bravais prob'].values[0] >= 0.5:
        a = [lattice.a.values[0]-pm, lattice.a.values[0]+pm]
        b = [lattice.b.values[0]-pm, lattice.b.values[0]+pm]
        c = [lattice.c.values[0]-pm, lattice.c.values[0]+pm]
        phi = [lattice.alpha.values[0]-pm, lattice.alpha.values[0]+pm]
        chi = [lattice.beta.values[0]-pm, lattice.beta.values[0]+pm]
        psi = [lattice.gamma.values[0]-pm, lattice.gamma.values[0]+pm]
        v = lattice.v.values[0]
        cellbounds = CellBounds(bounds={'phi': phi, 'chi': chi, 'psi': psi, 
                                'a': a, 'b': b, 'c': c})
    else:
        cellbounds = CellBounds(bounds=default_bounds)
        v = default_v
    slab = Atoms('', pbc=True)
    blmin = closest_distances_generator(atom_numbers=[atomic_numbers[block] for block in blocks],ratio_of_covalent_radii=0.5)
    sg = StartGenerator(slab, blocks, blmin, box_volume=v,
                    number_of_variable_cell_vectors=3,
                    cellbounds=cellbounds)
    return sg, cellbounds, blmin

In [22]:
assert not os.path.exists(db_file_name), db_file_name + " already exists"
d = PrepareDB(db_file_name=db_file_name,
               stoichiometry=blocks)
sg, cellbounds, blmin = lattice_to_sg(out['Top-1 Bravais'], blocks)
for i in tqdm(range(N)):
    a = sg.get_new_candidate()
    d.add_unrelaxed_candidate(a)

  0%|          | 0/20 [00:00<?, ?it/s]

 50%|█████     | 10/20 [00:30<00:30,  3.00s/it]


KeyboardInterrupt: 

In [23]:
da = DataConnection('gadb2.db')
slab = da.get_slab()
atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()
n_top = len(atom_numbers_to_optimize)

In [24]:
atom_numbers_to_optimize, n_top

(['Ti', 'V', 'V', 'Cr', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], 14)

In [25]:
comp = OFPComparator(n_top=n_top, dE=1.0,
                     cos_dist_max=1e-3, rcut=10., binwidth=0.05,
                     pbc=[True, True, True], sigma=0.05, nsigma=4,
                     recalculate=False)

In [26]:
pairing = CutAndSplicePairing(slab, n_top, blmin, p1=1., p2=0., minfrac=0.15,
                              number_of_variable_cell_vectors=3,
                              cellbounds=cellbounds, use_tags=False)

In [27]:
strainmut = StrainMutation(blmin, stddev=0.7, cellbounds=cellbounds,
                           number_of_variable_cell_vectors=3,
                           use_tags=False)

In [28]:
blmin_soft = closest_distances_generator([atomic_numbers[block] for block in blocks], 0.1)
softmut = SoftMutation(blmin_soft, bounds=[2., 5.], use_tags=False)

In [29]:
operators = OperationSelector([4., 3., 3.],
                              [pairing, softmut, strainmut])

In [30]:
relaxer = StructOptimizer()

CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on mps


In [22]:
i = 0
while da.get_number_of_unrelaxed_candidates() > 0:
    print("relaxing candidate no." + str(i))
    a = da.get_an_unrelaxed_candidate()

    relax(a, relaxer, cellbounds=cellbounds, verbose=True)
    da.add_relaxed_step(a)

    cell = a.get_cell()
    if not cellbounds.is_within_bounds(cell):
        print("Killed" + a.info['confid'])
        da.kill_candidate(a.info['confid'])
    
    i += 1

relaxing candidate no.0
      Step     Time          Energy          fmax
FIRE:    0 13:54:15      -42.507668       81.273228
FIRE:    1 13:54:20      -57.830315       26.115394
FIRE:    2 13:54:23      -66.705063       20.285579
FIRE:    3 13:54:26      -73.461281       15.004684
FIRE:    4 13:54:28      -78.613358       12.787092
FIRE:    5 13:54:33      -82.561264        9.751483
FIRE:    6 13:54:36      -85.673279        9.337994
FIRE:    7 13:54:38      -87.984222        7.102632
FIRE:    8 13:54:38      -89.357506        6.930305
FIRE:    9 13:54:40      -90.324402        6.541401
FIRE:   10 13:54:42      -91.338387        4.977786
FIRE:   11 13:54:45      -92.090843        3.387459
FIRE:   12 13:54:48      -92.149261       10.051159
FIRE:   13 13:54:49      -93.023415        2.933808
FIRE:   14 13:54:49      -93.336258        4.756044
FIRE:   15 13:54:53      -93.880074        5.402597
FIRE:   16 13:54:55      -95.047226        4.852294
FIRE:   17 13:54:57      -96.617577       

In [23]:
# Initialize the population
population_size = 20
population = Population(data_connection=da,
                        population_size=population_size,
                        comparator=comp,
                        logfile='log.txt',
                        use_extinct=True)

# Update the scaling volume used in some operators
# based on a number of the best candidates
current_pop = population.get_current_population()
strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)

# Test n_to_test new candidates; in this example we need
# only few GA iterations as the global minimum (FCC Ag)
# is very easily found (typically already after relaxation
# of the initial random structures).
n_to_test = 50

In [24]:
for step in range(n_to_test):
    print(f'Now starting configuration number {step}')

    # Create a new candidate
    a3 = None
    while a3 is None:
        a1, a2 = population.get_two_candidates()
        a3, desc = operators.get_new_individual([a1, a2])

    # Save the unrelaxed candidate
    da.add_unrelaxed_candidate(a3, description=desc)

    # Relax the new candidate and save it
    print("relaxing")
    relax(a3, relaxer, cellbounds=cellbounds, verbose=True)
    da.add_relaxed_step(a3)

    # If the relaxation has changed the cell parameters
    # beyond the bounds we disregard it in the population
    cell = a3.get_cell()
    if not cellbounds.is_within_bounds(cell):
        da.kill_candidate(a3.info['confid'])

    # Update the population
    population.update()

    if step % 10 == 0:
        # Update the scaling volumes of the strain mutation
        # and the pairing operator based on the current
        # best structures contained in the population
        current_pop = population.get_current_population()
        strainmut.update_scaling_volume(current_pop, w_adapt=0.5,
                                        n_adapt=4)
        pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
        write('current_population.traj', current_pop)

print('GA finished after step %d' % step)
hiscore = get_raw_score(current_pop[0])
print('Highest raw score = %8.4f eV' % hiscore)

all_candidates = da.get_all_relaxed_candidates()
write('all_candidates.traj', all_candidates)

current_pop = population.get_current_population()
write('current_population.traj', current_pop)

Now starting configuration number 0
Now starting configuration number 1
Now starting configuration number 2
Now starting configuration number 3
Now starting configuration number 4
Now starting configuration number 5
Now starting configuration number 6
Now starting configuration number 7
Now starting configuration number 8
Now starting configuration number 9
Now starting configuration number 10
Now starting configuration number 11
Now starting configuration number 12


In [50]:
from pymatgen.io.ase import AseAtomsAdaptor
from ase.io import read

In [41]:
struct = AseAtomsAdaptor.get_structure(read('test/current_population.traj'))

In [47]:
da = DataConnection('TiV2CrO10_ga.db')
population_size = 20
population = Population(data_connection=da,
                        population_size=population_size,
                        comparator=comp,
                        logfile='log.txt',
                        use_extinct=True)

In [51]:
AseAtomsAdaptor.get_structure(population.get_current_population()[0]).to("relaxed_final.cif")

"# generated using pymatgen\ndata_TiV2CrO10\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   6.27672980\n_cell_length_b   6.59314268\n_cell_length_c   7.25976307\n_cell_angle_alpha   79.66871539\n_cell_angle_beta   89.72888039\n_cell_angle_gamma   97.77778347\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   TiV2CrO10\n_chemical_formula_sum   'Ti1 V2 Cr1 O10'\n_cell_volume   292.71393315\n_cell_formula_units_Z   1\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Ti  Ti0  1  0.43866151  0.18507570  0.18728426  1\n  V  V1  1  0.31640008  0.63400123  0.32709196  1\n  V  V2  1  0.93800528  0.25770714  0.40526123  1\n  Cr  Cr3  1  0.60418780  0.08819269  0.76988327  1\n  O  O4  1  0.86152854  0.19018516  0.64979772  1\n  O  O5  1  0.42807530  0.07832088  0.612337