In [6]:
from ase.io import read, write
from ase.build import make_supercell
import numpy as np
import os
import random


In [25]:
def generate_supercells(cif_files, output_dir, N_MAX=4):
    # Convert Each CIF File (lots of gross text processing, beware!)
    for f in cif_files:
        f_name = f.split('.cif')[0]
        # Alter some formatting to make the file load better into ASE, copy into output folder
        occupation = dict()
        with open(input_dir+f, "r") as file:
            contents = file.read().split('\n')
            fh  = open(output_dir+f, 'w') # Re-saves a better formatted file
            format_start = False
            num_loops = 0  # number of times '_loop' is seen
            site_idx = 0 # Used for when no sites are labeled
            data_lookup = dict()
            for line in contents:
                # Format
                if len(line)<1:
                    format_start = False
                if format_start and "'" not in line:
                    spl = line.split()
                    fh.write(spl[0]+" '"+' '.join(spl[1:])+"' \n")
                else:
                    fh.write(line+'\n')
                if '_symmetry_equiv_pos_as_xyz' in line:
                    format_start = True
                if 'loop_' in line:
                    num_loops += 1
                # Record Occupation
                if num_loops>=2 and len(line)>1:  # Data stored in second _loop
                   if line[1] == '_':  # Add to data labels 
                        data_lookup[line] = len(data_lookup)  # Index for that data label
                   elif len(line.split()) > 4:  # Data probably listed
                       spl = line.split()
                       if ' _atom_site_Wyckoff_symbol' in data_lookup.keys():
                           site_label = ' _atom_site_Wyckoff_symbol'
                       elif ' _pauling_file_site_symmetry' in data_lookup.keys():
                           site_label = ' _pauling_file_site_symmetry'
                       else: raise Exception("NO SITE DESCRIPTORS FOUND")
                        
                       if spl[data_lookup[site_label]] not in occupation.keys():
                            occupation[spl[data_lookup[site_label]]] = {}
                       #  Add element occupancy to
                       if ' _atom_site_occupancy' in data_lookup.keys():
                            occupation[spl[data_lookup[site_label]]][spl[data_lookup[' _atom_site_type_symbol']]] = float(spl[data_lookup[' _atom_site_occupancy']])
                       else:  # No element occupancy data found
                           occupation[spl[data_lookup[site_label]]][spl[data_lookup[' _atom_site_type_symbol']]] = 1

            fh.close()    
            file.close()
        
        # Throw error if file is empty
        if len(occupation) < 1:
            raise Exception("No occupancy data found!")
        
        print("Name: ", f_name)   
        print("Occupation: ", occupation)
        # Load the original CIF file
        original_structure = read(output_dir+f) # Open formatted file copied into output dir
        
        # Define the supercell size adaptively
        # Find minimum element fraction for any given element
        x_min = 1
        for k1 in occupation.values():
            for k2 in k1.values():
                x_min = min(k2, x_min)
        n = np.ceil((5/x_min)**(1/3))  # Adaptive cell size to be <10% element representation error
        if x_min == 1:  # Use base cell if no mixed occupation lattice site
            supercell_dim = 1
        else:
            supercell_dim = min(n, N_MAX)  # Use a big enough supercell to limit representative error, max size N_MAX
        supercell_size = supercell_dim*np.eye(3) 
        print("Supercell dim: ", supercell_dim)
    
        # Create the supercell
        superlattice = make_supercell(original_structure, supercell_size)
        
        # Keep track of where each atom is in the superlattice
        site_list = list() # which site is at each atomic location in superlattice
        site_count = dict() # how many site of each exist
        for k in occupation.keys():
            site_count[k] = 0
        for atom in range(len(superlattice)):
            element = superlattice[atom].symbol
            print(superlattice[atom])
            for site in occupation.keys():
                if element in occupation[site].keys():
                    site_list.append(site)
                    site_count[site] = site_count[site] + 1
                    break
        if len(site_list) != len(superlattice):
            print(site_list)
            print(superlattice)
            raise Exception("Not every atom in super-lattice accounted for in occupation dictionary!")
        
        # Create list of atoms to draw from for each site
        site_sampler = dict()
        for site in occupation.keys():
            site_sampler[site] = list()
            # Make representative list of atoms to draw from
            for atom in occupation[site].keys():
                n_atoms = round( occupation[site][atom]*site_count[site] )
                site_sampler[site] = site_sampler[site] + [atom]*n_atoms
            # List too long, pop random element
            while len(site_sampler[site]) > site_count[site]: 
                random.shuffle(site_sampler[site])
                site_sampler[site].pop()
            # List too short, fill with vacancies
            while len(site_sampler[site]) < site_count[site]:
                site_sampler[site].append('')
    
        # Replace atoms based on occupation fraction
        vacancies = list()
        for site in occupation.keys():
            random.shuffle(site_sampler[site])
        for atom in range(len(superlattice)):
            atom_site = site_list[atom]
            element_draw = site_sampler[atom_site][0]
            del site_sampler[atom_site][0]
            if element_draw == "":
                vacancies.append(atom)
            else:
                superlattice[atom].symbol = element_draw
        # Create oxygen vacancies in lattice by deletion
        for i in range(len(vacancies)):
            del superlattice[vacancies[i]-i]
            
        # Save the superlattice structure in a new CIF file
        write(output_dir+f_name+'_super.cif', superlattice)
    
    print("Finished: processed "+ str(len(cif_files)) +" files")

In [26]:

## Script Parameters
input_dir = "data_linear/"  # CIF files source directory
output_dir = "supercells_data/"  # CIF supercell output file directory

# Load all CIF files in
file_type = ".cif"
files = os.listdir(input_dir)
cif_files = [file for file in files if file.endswith(file_type)]

# Supercell max dimension
N_MAX = 4  # maximum size allowed for supercell (NxNxN unit cells)

# Make Output Directory if needed
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

generate_supercells(cif_files, output_dir, N_MAX=N_MAX)

Name:  10004
Occupation:  {'8d': {'O': 1.0}, '4c': {'Sm': 1.0, 'O': 1.0}, '4a': {'Mn': 1.0}}
Supercell dim:  1
Atom('O', [1.7589312, 7.103410800000002, 4.3311554999999995], index=0)
Atom('O', [1.7589312, 4.1195892, 4.3311554999999995], index=1)
Atom('O', [4.6749312, 7.103410800000002, 3.7463444999999997], index=2)
Atom('O', [4.6749312, 4.1195892, 3.7463444999999997], index=3)
Atom('O', [4.0730688, 3.3624108, 1.0538444999999999], index=4)
Atom('O', [4.0730688, 0.3785892, 1.0538444999999999], index=5)
Atom('O', [1.1570688, 3.3624108, 1.6386555], index=6)
Atom('O', [1.1570688, 0.3785892, 1.6386555], index=7)
Atom('Sm', [0.3662496000000001, 5.6115, 2.7765059999999995], index=8)
Atom('Sm', [3.2822495999999997, 5.6115, 5.300994000000001], index=9)
Atom('Sm', [5.4657504, 1.8705, 2.608494], index=10)
Atom('Sm', [2.5497503999999998, 1.8705, 0.084006], index=11)
Atom('O', [5.6407104, 5.6115, 0.5411925000000002], index=12)
Atom('O', [2.7247103999999998, 5.6115, 2.1513074999999997], index=13)
Atom



Exception: No occupancy data found!