### Seminar #2. Python for atomistic modeling and analysis

#### Goals
- Learn basics of ASE and Pymatgen


#### Agenda
- Structure file formats
- Intro to ASE
- Intro to Pymatgen
- Nearest neighbors list


In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

### ASE: Intro
documentation: https://wiki.fysik.dtu.dk/ase/index.html

read .cif file

In [2]:
from ase.io import read

atoms = read('data/LiI.cif')
atoms # Atoms's object

Atoms(symbols='Li4I4', pbc=True, cell=[5.96835483, 5.96835483, 5.96835483], spacegroup_kinds=...)

#### Crystalographic information file (.cif)
- used to share the crystal structure representations
- minimum information:
    - space group
    - unit cell parameters
    - fractional coordinates of atoms
- additional informations:
    - symmetry operations
    - oxidation states
    - anything you need
    - anything you want

In [3]:
"""
# generated using pymatgen
data_LiBr
_symmetry_space_group_name_H-M   Fm-3m
_cell_length_a   5.44538116
_cell_length_b   5.44538116
_cell_length_c   5.44538116
_cell_angle_alpha   90.00000000
_cell_angle_beta   90.00000000
_cell_angle_gamma   90.00000000
_symmetry_Int_Tables_number   225
_chemical_formula_structural   LiBr
_chemical_formula_sum   'Li4 Br4'
_cell_volume   161.46740039
_cell_formula_units_Z   4
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
  2  '-x, -y, -z'
  3  '-y, x, z'
  4  'y, -x, -z'
  5  '-x, -y, z'
  6  'x, y, -z'
  7  'y, -x, z'
  8  '-y, x, -z'
  9  'x, -y, -z'
  10  '-x, y, z'
  11  '-y, -x, -z'
  12  'y, x, z'
  13  '-x, y, -z'
  14  'x, -y, z'
  15  'y, x, -z'
  16  '-y, -x, z'
  17  'z, x, y'
  18  '-z, -x, -y'
  19  'z, -y, x'
  20  '-z, y, -x'
  21  'z, -x, -y'
  22  '-z, x, y'
  23  'z, y, -x'
  24  '-z, -y, x'
  25  '-z, x, -y'
  26  'z, -x, y'
  27  '-z, -y, -x'
  28  'z, y, x'
  29  '-z, -x, y'
  30  'z, x, -y'
  31  '-z, y, x'
  32  'z, -y, -x'
  33  'y, z, x'
  34  '-y, -z, -x'
  35  'x, z, -y'
  36  '-x, -z, y'
  37  '-y, z, -x'
  38  'y, -z, x'
  39  '-x, z, y'
  40  'x, -z, -y'
  41  '-y, -z, x'
  42  'y, z, -x'
  43  '-x, -z, -y'
  44  'x, z, y'
  45  'y, -z, -x'
  46  '-y, z, x'
  47  'x, -z, y'
  48  '-x, z, -y'
  49  'x+1/2, y+1/2, z'
  50  '-x+1/2, -y+1/2, -z'
  51  '-y+1/2, x+1/2, z'
  52  'y+1/2, -x+1/2, -z'
  53  '-x+1/2, -y+1/2, z'
  54  'x+1/2, y+1/2, -z'
  55  'y+1/2, -x+1/2, z'
  56  '-y+1/2, x+1/2, -z'
  57  'x+1/2, -y+1/2, -z'
  58  '-x+1/2, y+1/2, z'
  59  '-y+1/2, -x+1/2, -z'
  60  'y+1/2, x+1/2, z'
  61  '-x+1/2, y+1/2, -z'
  62  'x+1/2, -y+1/2, z'
  63  'y+1/2, x+1/2, -z'
  64  '-y+1/2, -x+1/2, z'
  65  'z+1/2, x+1/2, y'
  66  '-z+1/2, -x+1/2, -y'
  67  'z+1/2, -y+1/2, x'
  68  '-z+1/2, y+1/2, -x'
  69  'z+1/2, -x+1/2, -y'
  70  '-z+1/2, x+1/2, y'
  71  'z+1/2, y+1/2, -x'
  72  '-z+1/2, -y+1/2, x'
  73  '-z+1/2, x+1/2, -y'
  74  'z+1/2, -x+1/2, y'
  75  '-z+1/2, -y+1/2, -x'
  76  'z+1/2, y+1/2, x'
  77  '-z+1/2, -x+1/2, y'
  78  'z+1/2, x+1/2, -y'
  79  '-z+1/2, y+1/2, x'
  80  'z+1/2, -y+1/2, -x'
  81  'y+1/2, z+1/2, x'
  82  '-y+1/2, -z+1/2, -x'
  83  'x+1/2, z+1/2, -y'
  84  '-x+1/2, -z+1/2, y'
  85  '-y+1/2, z+1/2, -x'
  86  'y+1/2, -z+1/2, x'
  87  '-x+1/2, z+1/2, y'
  88  'x+1/2, -z+1/2, -y'
  89  '-y+1/2, -z+1/2, x'
  90  'y+1/2, z+1/2, -x'
  91  '-x+1/2, -z+1/2, -y'
  92  'x+1/2, z+1/2, y'
  93  'y+1/2, -z+1/2, -x'
  94  '-y+1/2, z+1/2, x'
  95  'x+1/2, -z+1/2, y'
  96  '-x+1/2, z+1/2, -y'
  97  'x+1/2, y, z+1/2'
  98  '-x+1/2, -y, -z+1/2'
  99  '-y+1/2, x, z+1/2'
  100  'y+1/2, -x, -z+1/2'
  101  '-x+1/2, -y, z+1/2'
  102  'x+1/2, y, -z+1/2'
  103  'y+1/2, -x, z+1/2'
  104  '-y+1/2, x, -z+1/2'
  105  'x+1/2, -y, -z+1/2'
  106  '-x+1/2, y, z+1/2'
  107  '-y+1/2, -x, -z+1/2'
  108  'y+1/2, x, z+1/2'
  109  '-x+1/2, y, -z+1/2'
  110  'x+1/2, -y, z+1/2'
  111  'y+1/2, x, -z+1/2'
  112  '-y+1/2, -x, z+1/2'
  113  'z+1/2, x, y+1/2'
  114  '-z+1/2, -x, -y+1/2'
  115  'z+1/2, -y, x+1/2'
  116  '-z+1/2, y, -x+1/2'
  117  'z+1/2, -x, -y+1/2'
  118  '-z+1/2, x, y+1/2'
  119  'z+1/2, y, -x+1/2'
  120  '-z+1/2, -y, x+1/2'
  121  '-z+1/2, x, -y+1/2'
  122  'z+1/2, -x, y+1/2'
  123  '-z+1/2, -y, -x+1/2'
  124  'z+1/2, y, x+1/2'
  125  '-z+1/2, -x, y+1/2'
  126  'z+1/2, x, -y+1/2'
  127  '-z+1/2, y, x+1/2'
  128  'z+1/2, -y, -x+1/2'
  129  'y+1/2, z, x+1/2'
  130  '-y+1/2, -z, -x+1/2'
  131  'x+1/2, z, -y+1/2'
  132  '-x+1/2, -z, y+1/2'
  133  '-y+1/2, z, -x+1/2'
  134  'y+1/2, -z, x+1/2'
  135  '-x+1/2, z, y+1/2'
  136  'x+1/2, -z, -y+1/2'
  137  '-y+1/2, -z, x+1/2'
  138  'y+1/2, z, -x+1/2'
  139  '-x+1/2, -z, -y+1/2'
  140  'x+1/2, z, y+1/2'
  141  'y+1/2, -z, -x+1/2'
  142  '-y+1/2, z, x+1/2'
  143  'x+1/2, -z, y+1/2'
  144  '-x+1/2, z, -y+1/2'
  145  'x, y+1/2, z+1/2'
  146  '-x, -y+1/2, -z+1/2'
  147  '-y, x+1/2, z+1/2'
  148  'y, -x+1/2, -z+1/2'
  149  '-x, -y+1/2, z+1/2'
  150  'x, y+1/2, -z+1/2'
  151  'y, -x+1/2, z+1/2'
  152  '-y, x+1/2, -z+1/2'
  153  'x, -y+1/2, -z+1/2'
  154  '-x, y+1/2, z+1/2'
  155  '-y, -x+1/2, -z+1/2'
  156  'y, x+1/2, z+1/2'
  157  '-x, y+1/2, -z+1/2'
  158  'x, -y+1/2, z+1/2'
  159  'y, x+1/2, -z+1/2'
  160  '-y, -x+1/2, z+1/2'
  161  'z, x+1/2, y+1/2'
  162  '-z, -x+1/2, -y+1/2'
  163  'z, -y+1/2, x+1/2'
  164  '-z, y+1/2, -x+1/2'
  165  'z, -x+1/2, -y+1/2'
  166  '-z, x+1/2, y+1/2'
  167  'z, y+1/2, -x+1/2'
  168  '-z, -y+1/2, x+1/2'
  169  '-z, x+1/2, -y+1/2'
  170  'z, -x+1/2, y+1/2'
  171  '-z, -y+1/2, -x+1/2'
  172  'z, y+1/2, x+1/2'
  173  '-z, -x+1/2, y+1/2'
  174  'z, x+1/2, -y+1/2'
  175  '-z, y+1/2, x+1/2'
  176  'z, -y+1/2, -x+1/2'
  177  'y, z+1/2, x+1/2'
  178  '-y, -z+1/2, -x+1/2'
  179  'x, z+1/2, -y+1/2'
  180  '-x, -z+1/2, y+1/2'
  181  '-y, z+1/2, -x+1/2'
  182  'y, -z+1/2, x+1/2'
  183  '-x, z+1/2, y+1/2'
  184  'x, -z+1/2, -y+1/2'
  185  '-y, -z+1/2, x+1/2'
  186  'y, z+1/2, -x+1/2'
  187  '-x, -z+1/2, -y+1/2'
  188  'x, z+1/2, y+1/2'
  189  'y, -z+1/2, -x+1/2'
  190  '-y, z+1/2, x+1/2'
  191  'x, -z+1/2, y+1/2'
  192  '-x, z+1/2, -y+1/2'
loop_
 _atom_type_symbol
 _atom_type_oxidation_number
  Li+  1.0
  Br-  -1.0
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Li+  Li0  4  0.00000000  0.00000000  0.00000000  1
  Br-  Br1  4  0.00000000  0.00000000  0.50000000  1
"""

"\n# generated using pymatgen\ndata_LiBr\n_symmetry_space_group_name_H-M   Fm-3m\n_cell_length_a   5.44538116\n_cell_length_b   5.44538116\n_cell_length_c   5.44538116\n_cell_angle_alpha   90.00000000\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   225\n_chemical_formula_structural   LiBr\n_chemical_formula_sum   'Li4 Br4'\n_cell_volume   161.46740039\n_cell_formula_units_Z   4\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\n  2  '-x, -y, -z'\n  3  '-y, x, z'\n  4  'y, -x, -z'\n  5  '-x, -y, z'\n  6  'x, y, -z'\n  7  'y, -x, z'\n  8  '-y, x, -z'\n  9  'x, -y, -z'\n  10  '-x, y, z'\n  11  '-y, -x, -z'\n  12  'y, x, z'\n  13  '-x, y, -z'\n  14  'x, -y, z'\n  15  'y, x, -z'\n  16  '-y, -x, z'\n  17  'z, x, y'\n  18  '-z, -x, -y'\n  19  'z, -y, x'\n  20  '-z, y, -x'\n  21  'z, -x, -y'\n  22  '-z, x, y'\n  23  'z, y, -x'\n  24  '-z, -y, x'\n  25  '-z, x, -y'\n  26  'z, -x, y'\n  27  '-z, -y, -x'\n  28  'z, y,

Input and output data formats supported by ASE

https://wiki.fysik.dtu.dk/ase/ase/io/io.html


Atoms object overview

In [None]:
from ase.io import read

atoms = read('data/LiI.cif')
atoms # Atoms's object

In [None]:
atoms.positions # cartesian positions

In [None]:
atoms.numbers # atomic numbers

In [None]:
atoms.symbols # symbols

In [None]:
atoms.cell # basis vectors

In [None]:
atoms.cell + 0.0

In [79]:
a, b, c, alpha, beta, gamma = atoms.cell.cellpar()

In [None]:
atoms.pbc # periodic boundary conditions

In [None]:
atoms.arrays

In [None]:
atoms.info

you can store additional properties/attributes

In [None]:
atoms.info.update({'chemsys': 'Li-I'})
atoms.info

you can delete not important props

In [13]:
try:
    del atoms.info['occupancy']
    atoms.info
except:
    pass

In [2]:
#atoms.set_array

you can append or remove an atom

In [304]:
atoms.append('Cs')

In [None]:
atoms.positions[-1]

In [306]:
atoms.positions[-1] += [3, 2, 1]

In [None]:
atoms.positions

In [None]:
atoms.symbols

In [309]:
del atoms[-1]

In [None]:
atoms

write .cif file

In [117]:
from ase.io import write

write('data/some_name.cif', atoms)

read .xyz

In [None]:
"""
12
Benzene, Source: https://github.com/nutjunkie/IQmol/blob/master/share/fragments/Molecules/Aromatics/Benzene.xyz
  H      1.2194     -0.1652      2.1600
  C      0.6825     -0.0924      1.2087
  C     -0.7075     -0.0352      1.1973
  H     -1.2644     -0.0630      2.1393
  C     -1.3898      0.0572     -0.0114
  H     -2.4836      0.1021     -0.0204
  C     -0.6824      0.0925     -1.2088
  H     -1.2194      0.1652     -2.1599
  C      0.7075      0.0352     -1.1973
  H      1.2641      0.0628     -2.1395
  C      1.3899     -0.0572      0.0114
  H      2.4836     -0.1022      0.0205
"""


atoms = read('data/benzene.xyz')
atoms

.xyz format allows saving a list of molecular structures

In [120]:
images = []

for i in range(5):
    image = atoms.copy()
    image.positions += i * 10
    images.append(image)
images
write('data/moving_benzene.xyz', images) # use jmol to see the changes

you cannot save a list of crystal structures in .cif format, but you can do it in .extxyz format

write Atoms object in a file

In [121]:
from ase.io import write
atoms = read('data/LiI.cif')
filename = 'data/LiI.extxyz' # extxyz allows to store crystal structures in .xyz-like format
write(filename, atoms)

In [None]:
atoms_saved = read(filename)
atoms_saved.info

#### Manipulating Atoms object

get sublattice

In [26]:
sublattice = atoms[atoms.numbers == 3].copy() # use copy to decouple replicas

modify coordinates

In [123]:
atoms.positions[-1] += [0.1, 0.05, 0.2]

In [None]:
atoms.positions

turn off pbc 

In [None]:
atoms.pbc = [True, True, False]
atoms.pbc

replace species

In [129]:
atoms.numbers = np.where(atoms.numbers == 3, 1, atoms.numbers)

In [None]:
atoms

visualize Atoms

In [None]:
from ase.visualize import view
view(atoms, viewer='x3d')

create supercell

In [None]:
from ase.build import make_supercell
P = [
        [3, 0, 0],
        [0, 2, 0],
        [0, 0, 6]
    ]

supercell = make_supercell(atoms, P)
view(supercell, viewer='x3d')

### Task 1: Unit cell volume

Write a function that takes Atoms object (crystal structure) as an input and return volume of its unit cell

Hint: Scalar triple product

In [None]:
atoms = read('data/LiF.cif')

def unit_cell_volume(atoms):

    """
    This function calculates the unit cell volume of the given Atoms object.

    Params:
    ------

    atoms: Ase's Atoms object
        a crystal structure for which unit cell the volume should be calculated

    Returns:

    volume of the unit cell in Angstrom ^ 3
    """
    pass

unit_cell_volume(atoms.cell),  atoms.cell.volume


### Task 2: Substitution
Write a function that takes the Atom object (crystal structure) as input and returns the structure with the species of interest replaced on the given chemical element.


In [None]:
atoms = read('data/LiF.cif')

def replace_atoms(atoms, specie, substitute):

    """
    This function returns atoms with species replaced by the substitute

    Params:
    ------

    atoms: Ase's Atoms object
        a crystal structure for which unit cell the volume should be calculated

    specie: int
        atomic number

    substitute: int
        atomic number

    Returns:

    modified Atoms object
    """
    pass


Building Atoms object

In [134]:
from ase import Atoms
from ase.cell import Cell

In [135]:
a, b, c, alpha, beta, gamma = 3.359, 3.359, 3.359, 90, 90, 90 # unit cell parameters, cubic lattice
positions = np.array([0.5, 0.5, 0.5]).reshape(-1, 3) # positions N x 3
numbers = np.array([84]) # N
cell = Cell.fromcellpar([a, b, c, alpha, beta, gamma]) # cell matrix can be created from the unit cell parameters
pbc = [True, True, True]
atoms = Atoms(positions = positions, numbers = numbers, pbc = pbc, cell = cell)

get space group of the structure

In [None]:
from ase.spacegroup import get_spacegroup
sg = get_spacegroup(atoms) # uses spglib library under the hood
sg

In [137]:
# rotation and translation symmetry operations

for (rot, tr) in sg.get_symop():
    pass

symmetry operation

In [None]:
np.dot(atoms.positions[0], rot) + np.dot(tr, atoms.cell) # is it a right way?

### Connectivity matrix for molecules

In [231]:
from ase.io import read
import numpy as np
import matplotlib.pyplot as plt
from ase.neighborlist import NeighborList

![image](https://media.geeksforgeeks.org/wp-content/uploads/20240424142538/Adjacency-Matrix.webp)

source: https://www.geeksforgeeks.org/adjacency-matrix/

In [None]:
atoms = read('data/benzene.xyz')
atoms.pbc

#### Approximate solution

In [233]:
def _get_connectivity_matrix(atoms, cutoff):

    """
    This function calculates a connectivity matrix for a given molecule

    Params
    ------

    atoms: ASE's Atoms object
        molecule
    
    cutoff: float
        cutoff radius to consider for bonded atoms
    
    Returns
    -------
    The connectivity matrix, list of source indices, list of targe indices, list of distances

    """


    source_list, target_list, distance_list = [], [], []
    connectivity_matrix = []

    for source in range(len(atoms)):
        connectivity_matrix.append([])
        for target in range(len(atoms)):
            p_source = atoms.positions[source]
            p_target = atoms.positions[target]
            distance = np.linalg.norm(p_source - p_target)
            source_list.append(source)
            target_list.append(target)
            distance_list.append(distance)
            if distance > cutoff:
                connectivity_matrix[source].append(0)
            else:
                connectivity_matrix[source].append(1)

            if source == target:
                connectivity_matrix[source][target] = 0


    source_list = np.array(source_list)
    target_list = np.array(target_list)
    distance_list = np.array(distance_list)
    connectivity_matrix = np.array(connectivity_matrix)
    return connectivity_matrix, source_list, target_list, distance_list

cm, _, _, _ = _get_connectivity_matrix(atoms, 2.5)

### ASE's implementation

In [234]:
cutoff = 2.5
neighbor_list = NeighborList([cutoff / 2] * len(atoms), self_interaction=False, bothways=True)
neighbor_list.update(atoms)
matrix = neighbor_list.get_connectivity_matrix(sparse=False)

### Nearest neighbor list considering pbc
Approach:
- add buffer around the unit cell

#### Approximate solution (we collect neighbors in a cube, cutoff is not used here)

In [235]:
import itertools

In [253]:
atoms = read('data/LiF.cif')

def _get_nn_list(atoms):


    """
    This function finds an approximate neighbors list for a given structure.

    The periodic boundary conditions are considered by constructing a 3x3x3 supercell

    Params
    ------

    atoms: ASE's Atoms object
        molecule
    
    cutoff: float
        cutoff radius to consider for bonded atoms
    
    Returns
    -------
    The connectivity matrix, list of source indices, list of targe indices, list of distances

    """
    offsets = tuple(list(itertools.product(
                                            [-1, 0, 1],
                                            [-1, 0, 1],
                                            [-1, 0, 1],
                                            )
                        )
                    )  
    source_list, target_list, offset_list, distance_list = [], [], [], []

    v1, v2, v3 = atoms.cell[0, :], atoms.cell[1, :], atoms.cell[2, :] # translational vectors

    for source in range(len(atoms)):
        for target in range(len(atoms)):
            for offset in offsets:
                if (source == target)&(offset == (0, 0, 0)):
                    continue

                p_source = atoms.positions[source]
                p_target = atoms.positions[target] + np.dot(offset, atoms.cell)
                distance = np.linalg.norm(p_source - p_target)

                source_list.append(source)
                target_list.append(target)
                offset_list.append(offset_list)
                distance_list.append(distance)

    source_list = np.array(source_list)
    target_list = np.array(target_list)
    distance_list = np.array(distance_list)

    return source_list, target_list, distance_list

source_list, target_list, distance_list = _get_nn_list(atoms)

In [None]:
print(len(distance_list[distance_list < 5.95]))
_ = plt.hist(distance_list[distance_list < 5.95], bins = 100)

#### ASE's implementation

In [None]:
from ase.neighborlist import neighbor_list

ii, jj, dd = neighbor_list('ijd', atoms, 5.95, self_interaction=False)

_ = plt.hist(dd, bins = 100)

len(dd)

### Task 3: Nearest neighbor

Write a function that calculates a minimum distance for each site in a given structure and returns its value and the nearest neighbor symbol

In [264]:

def _get_minimum_distances(atoms, cutoff):

    """
    This function finds the nearest neighbor for each site in a given structure

    Params
    ------

    atoms: Atoms
        crystal structure
    cutoff float
        cutoff radius

    Returns
    -------

    list with the source atom chemical symbols,
    list with the nearest neighbor chemical symbols, and
    list with the distances between the source and the nearest neighbor

    """

    pass

_get_minimum_distances(atoms, 5.0)

#### The killer feature of the ASE package is the calculators and optimizers. We will discuss these in Seminar #9.

- https://wiki.fysik.dtu.dk/ase/ase/calculators/calculators.html

- https://wiki.fysik.dtu.dk/ase/ase/optimize.html

### Pymatgen: Intro
documentation: https://pymatgen.org/

In [4]:
from pymatgen.core import Structure
from ase.io import read
import numpy as np
import matplotlib.pyplot as plt

st = Structure.from_file('data/LiF.cif')
st

atoms = read('data/LiF.cif')

while the Atoms object is a structure of arrays, the Structure object is an array of structures

In [None]:
atoms[atoms.numbers == 3] 

In [None]:
st[np.array(st.atomic_numbers) == 3] # we cannot handle the Structure in a similar way

i.e. it's not as handy, but it has a lot more functionality (see the documentation)

In [None]:
for site in st.sites:
    print(site.frac_coords, site.specie)

In [None]:
st.lattice

In [None]:
st.lattice.matrix - atoms.cell

In [None]:
st.volume

### Neighbor list using pymatgen and Voronoi partitioning

Motivation:
- crystal structures are very different, there is no a universal cutoff radius value for every system

- i.e. the cutoff radius is a parameter that we want to get rid of

- we can do it with the Voronoi partitioning scheme

![image](https://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Euclidean_Voronoi_diagram.svg/768px-Euclidean_Voronoi_diagram.svg.png)



#### Approximate solution
source: https://github.com/mcs-cice/IonExplorer2/blob/main/geometry.py

In [12]:
from scipy.spatial import Voronoi
import itertools 

def find_vneighbors(points, central_points, key=1, min_dist=-float("inf"), max_dist=float("inf")):


    """
     Parameter mod can takes values 1, 2, or 3 that correspond to the
    search for domains adjacent by vertices, edges or faces.
    """

    neighbors = {i: None for i in central_points}
    vor = Voronoi(points)
    for i in central_points:
        cp = points[i]
        region = vor.regions[vor.point_region[i]]
        if -1 in region:
            raise ValueError("The domain for \"" + str(i) + "\" point is not closed!")
        local_neighbors = []
        for j in range(len(points)):
            numb_common_vertices = len(np.intersect1d(region, vor.regions[vor.point_region[j]]))
            if i != j and numb_common_vertices >= key and min_dist < np.linalg.norm(cp - points[j]) < max_dist:
                local_neighbors.append(j)
        neighbors[i] = local_neighbors
    return neighbors




atoms = read('data/LiF.cif')
points = []
offsets = tuple(list(itertools.product(
                                        [-1, 0, 1],
                                        [-1, 0, 1],
                                        [-1, 0, 1],
                                        )
                    )
                )  


central_points_ids = []
for i, offset in enumerate(offsets):
    points.append(atoms.positions + np.dot(offset, atoms.cell))
    if offset == (0, 0, 0):
        central_points_ids = np.arange(len(atoms) * i, len(atoms) * (i + 1))

points = np.vstack(points)

nn = find_vneighbors(points, central_points_ids, key=3, min_dist=-float("inf"), max_dist=float("inf"))


In [13]:
source_list = []
target_list = []
distance_list = []
for source in nn.keys():
    for target in nn[source]:
        source_list.append(source)
        target_list.append(target)
        d = np.linalg.norm(points[source] - points[target])
        distance_list.append(d)

In [None]:
print(min(distance_list), max(distance_list))
print(len(distance_list))

_ = plt.hist(distance_list)

#### Pymatgen's solution

In [15]:
from pymatgen.analysis.local_env import VoronoiNN

In [16]:
calc = VoronoiNN()
voro_data = calc.get_all_voronoi_polyhedra(st)

In [None]:
type(voro_data)

In [None]:
voro_data[0][1187]

In [19]:
ii, jj, dd = [], [], []
for source, data in enumerate(voro_data):
    for target in data.keys():
        ii.append(source)
        dd.append(2 * data[target]['face_dist'])

In [None]:
print(min(dd), max(dd))
print(len(dd))

#### We will explore more Pymatgen functionality at the next seminar (seminar #3).

### Task 3

Analyse the correlation between the Eg and X in LiX

Steps:
- Read the structures

- Collect Eg and Li-X bond lengths (using VoronoiNN)
    -  Eg vs. X data is stored in data/eg_vs_x.csv
- Plot Eg vs. Li-X
- Fit the line 
- Report correlation coefficient


In [63]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from pymatgen.core import Structure
from pymatgen.analysis.local_env import VoronoiNN
from scipy.stats import linregress

### Solutions to the tasks



In [None]:
# task 1
def unit_cell_volume(atoms):
    cell_matrix = atoms.cell
    v1, v2, v3 = cell_matrix[0, :], cell_matrix[1, :], cell_matrix[2, :]
    return np.dot(v1, np.cross(v2, v3))

# task 2
def _get_minimum_distances(atoms, cutoff):

    ii, jj, dd = neighbor_list('ijd', atoms, cutoff, self_interaction=False)


    source_ids = []
    nn_symbols = []
    d_min_list = []
    for i in np.unique(ii):
        d_min = dd[ii == i].min()

        nn_symbol = atoms.symbols[jj[ii ==i][np.argwhere(dd[ii == i] == dd.min()).ravel()[0]]]


        d_min_list.append(d_min)
        source_ids.append(i)
        nn_symbols.append(nn_symbol)
    return source_ids, d_min_list, nn_symbols


# task 3
# read structure and Eg data
data = pd.read_csv('data/eg_vs_x.csv')

structures = []
for X in data.x:
    st = Structure.from_file(f'data/Li{X}.cif')
    structures.append(st)
data['structure'] = structures

# collect distances
li_x_mean_distances = []
for st in data.structure:
    calc = VoronoiNN()
    li_x_distances = []
    for i, site in enumerate(st.sites):
        if str(site.specie.element) == 'Li':
            poly_data = calc.get_voronoi_polyhedra(st, i)
            for nn in poly_data:
                li_x_distances.append(2 * poly_data[nn]['face_dist'])
    li_x_mean_distances.append(np.mean(li_x_distances))
data['li_x_distance'] = li_x_mean_distances

# plot data
fig, ax = plt.subplots(dpi = 150, figsize = (3, 3))
ax.scatter(data.eg, data.li_x_distance, label = 'data points')

res = linregress(data.eg, data.li_x_distance)
ax.plot(data.eg, data.eg * res.slope + res.intercept, label = 'linear fit')

ax.set_xlabel('Eg, eV')
ax.set_ylabel('Li-X bond length, $\AA$')

print(res.rvalue)


### Example from the slides

In [None]:
import pandas as pd
from ase.io import read, write
from mendeleev.fetch import fetch_table
import matplotlib.pyplot as plt
from mendeleev.fetch import fetch_ionic_radii
from ions.data import ionic_radii
import numpy as np
from ase.neighborlist import neighbor_list
from ase.visualize import view

table = fetch_table('elements')

df = pd.read_csv('data/eg_vs_x.csv')
df['symbol'] = df['x']

radii = []
for x in df.x: 
    radii.append(ionic_radii[x][-1])

data['r_i'] = radii

X = df['x'].values
structures = [] 

for element in X:
    atoms = read(f'data/Li{element}.cif')
    structures.append(atoms)

from ase.build import make_supercell

sc = make_supercell(atoms, [[3, 0, 0 ], [0, 3, 0], [0, 0, 3]])
view(sc, viewer='x3d')


def get_li_x_bond_length(atoms, r_cut = 3.0):

    li_ids = np.where(atoms.symbols == 'Li')[0].ravel()

    ii, jj, dd = neighbor_list('ijd', atoms,  r_cut, self_interaction=False)

    min_distance = []
    for index in li_ids:
        min_distance.append(dd[ii == index].min())

    return min_distance

min_distance = get_li_x_bond_length(atoms)

li_x_distance = []
for atoms in structures:
    li_x_distance.append(np.mean(get_li_x_bond_length(atoms)))


df['li_x_bond_length'] = li_x_distance

df = table[['symbol', 'en_pauling', 'en_allen']].merge(df)


plt.rcParams['font.sans-serif'] = "Arial"
# Then, "ALWAYS use sans-serif fonts"
plt.rcParams['font.family'] = "sans-serif"
plt.rcParams['font.size'] = 16

#plt.rcParams.update({'font.family': 'Sans-Serif'})
fig, ax = plt.subplots(figsize =(8, 3), dpi = 300)
ax.grid(zorder = -1, alpha = 1.0, axis = 'x', color = 'w')
# Horizontal Bar Plot
ax.barh(df.x, df.eg)
ax.set_xlabel('Eg, eV')
plt.tight_layout()
#fig.savefig('/Users/artemdembitskiy/Desktop/projects/intro-to-materials-informatics/src/lectures/lecture2/figures/eg_trend.png')


from scipy.stats import linregress

plt.rcParams['font.size'] = 11
fig, (ax, ax2) = plt.subplots(dpi = 600, figsize = (5, 3), ncols = 2, sharey = True)
ax.scatter(df.li_x_bond_length, df.eg, label = 'measured', color = 'darkred', alpha = 0.5)
 

res = linregress(df.li_x_bond_length, df.eg)
ax.plot(df.li_x_bond_length, df.li_x_bond_length * res.slope + res.intercept, label = 'fit', zorder = -1)

ax.set_xlabel('Li-X bond length, $\AA$')
ax.set_ylabel('Eg, eV')

res = linregress(df.en_pauling, df.eg)
ax2.scatter(df.en_pauling, df.eg, color = 'darkcyan', alpha = 0.5)
ax2.plot(df.en_pauling, df.en_pauling * res.slope + res.intercept, label = 'fit', zorder = -1)
ax2.set_xlabel('Electronegativity (Pauling)')
ax.legend(frameon = False)

plt.tight_layout()

#fig.savefig('/Users/artemdembitskiy/Desktop/projects/intro-to-materials-informatics/src/lectures/lecture2/figures/eg_correlation.png')



#### Are Li-X bond length and X electronegativity correlated?