# Pactice notebook

Index

[CaMgO dataset](#camgo)


In [7]:
import os
import copy
import json
import itertools
import shutil as sh
from pathlib import Path
import numpy as np
import pandas as pd
import time

from CRYSTALpytools.crystal_io import Crystal_output, Crystal_input, Crystal_density, Crystal_gui
from CRYSTALpytools.convert import cry_gui2pmg, cry_out2pmg
from CRYSTALpytools.utils import view_pmg

from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.io.cif import CifWriter
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer, PointGroupAnalyzer

from ase.visualize import view

from dscribe.descriptors import CoulombMatrix

from sklearn.neighbors import NearestNeighbors, KNeighborsRegressor
from sklearn.metrics import mean_squared_error 
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,15)

# <a id='camgo'>CaMgO dataset</a>

### Structures

- structures is a list of all the structures as pymatgen objects

In [2]:
new_atom = 'Mg'

#cry_output = Crystal_output('data/classification/ml/cao_CONFCNT.out')
cry_output = Crystal_output().read_cry_output('data/crystal/cao_confcount.out')

cry_output.get_config_analysis()

#original_structure = cry_gui2pmg('data/classification/ml/cao_CONFCNT.gui')
original_structure_gui =  Crystal_gui().read_cry_gui('data/crystal/cao_confcount.gui')
original_structure = cry_gui2pmg(original_structure_gui)

structures_mco = []
ca_atoms = []
mg_atoms = []
for j,substitutions in enumerate(cry_output.atom_type1):
    new_structure = original_structure.copy()
    for i in substitutions:
        new_structure.replace(i-1,new_atom)
    structures_mco.append(new_structure)
    ca_atoms.append((np.array(cry_output.atom_type1[j])-1).tolist())
    mg_atoms.append((np.array(cry_output.atom_type2[j])-1).tolist())

structures = copy.deepcopy(structures_mco)

### Single point energies & band gap

- energies_sp is a list of single point energies (non optimised geometry)
- gap_sp is a list of band gap values for the non optimised geometry

In [3]:
energies_sp = []
gap_sp = []
#structuress = []
for i in range(len(structures)):
    crystal_output = Crystal_output().read_cry_output('./data/crystal/sp/CaMgO_sp_%s.out'%str(i))
    if crystal_output.get_final_energy() != None:
        energies_sp.append(crystal_output.get_final_energy())
        gap_sp.append(crystal_output.get_band_gap())
        #structuress.append(i)

## Descriptors

In [5]:
#changing it back to the non-ASE structure:
NotAse_struct = []
for i in range(len(structures)):
    x = AseAtomsAdaptor().get_atoms(structures[i])
    NotAse_struct.append(x)

In [8]:
#CM descriptor with Dscribe
cm_dscribe_list = []
cm_ds = CoulombMatrix(n_atoms_max=56,permutation="eigenspectrum")
start = time.time()
for i in range(len(NotAse_struct)):
    dscribe_matrix = cm_ds.create([NotAse_struct[i]])
    dscribe_matrix = np.real(dscribe_matrix)
    cm_dscribe_list.append(dscribe_matrix)
    
print('Number of matrices read: ', len(cm_dscribe_list))
print("--- %s minutes ---" % ((time.time() - start)/60))

Number of matrices read:  4023
--- 0.009516549110412598 minutes ---


In [9]:
#CM descriptors with matminer
cm_matminer_list=[]
cm_mm = sf.CoulombMatrix(flatten=True)
start = time.time()
for i in range(len(structures)):
    matminer_matrix = cm_mm.fit([structures[i]])
    featurized_structure = matminer_matrix.featurize(structures[i])
    cm_matminer_list.append(featurized_structure)
    
print('Number of matrices read: ', len(cm_matminer_list))
print("--- %s minutes ---" % ((time.time() - start)/60))

  zeros[: len(eigs)] = eigs


Number of matrices read:  4023
--- 6.185516965389252 minutes ---
