In [1]:
from ase.io import read, write
from ase.build import make_supercell
import numpy as np
import os
import random

## Script Parameters
supercell_dim = 4 # how many unit cells to replicate in each direction
input_dir = "raw_oxide_data/"
output_dir = "supercell_data/"

# Load all CIF files in
file_type = ".cif"
files = os.listdir(input_dir)
cif_files = [file for file in files if file.endswith(file_type)]

# Make Output Directory if needed
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)



# Convert Each CIF File
for f in cif_files:
    f_name = f.split('.cif')[0]
    # Alter some formatting to make the file load better into ASE, copy into output folder
    occupation = dict()
    with open(input_dir+f, "r") as file:
        contents = file.read().split('\n')
        fh  = open(output_dir+f, 'w')
        format_start = False
        occupation_start = False
        site_idx = 0 # Used for when no sites are labeled
        for line in contents:
            # Format
            if len(line)<1:
                format_start = False
                occupation_start = False
            if format_start and "'" not in line:
                spl = line.split()
                fh.write(spl[0]+" '"+' '.join(spl[1:])+"' \n")
            else:
                fh.write(line+'\n')
            if '_symmetry_equiv_pos_as_xyz' in line:
                format_start = True
            # Record Occupation
            if '_A' in line:
                occupation_start = True
            if occupation_start:
                spl = line.split()
                if len(spl)<10:
                    spl.append(chr(97+site_idx))
                    site_idx = site_idx + 1
                if spl[9] not in occupation.keys():
                    occupation[spl[9]] = dict()
                occupation[spl[9]][spl[0].split("_")[0]] = float(spl[8])
        fh.close()    
        file.close()
    
    print(f_name)   
    print(occupation)
    # Load the original CIF file
    original_structure = read(output_dir+f) # Open formatted file copied into output dir
    
    # Define the supercell size
    supercell_size = supercell_dim*np.eye(3) 
    
    # Create the supercell
    superlattice = make_supercell(original_structure, supercell_size)
    
    # Keep track of where each atom is in the superlattice
    site_list = list() # which site is at each atomic location in superlattice
    site_count = dict() # how many site of each exist
    for k in occupation.keys():
        site_count[k] = 0
    for atom in range(len(superlattice)):
        element = superlattice[atom].symbol
        for site in occupation.keys():
            if element in occupation[site].keys():
                site_list.append(site)
                site_count[site] = site_count[site] + 1
                break
    if len(site_list) != len(superlattice):
        raise "Not every atom in superlattice accounted for in occupation dictionary!"
    print(site_count)
    
    # Create list of atoms to draw from for each site
    site_sampler = dict()
    for site in occupation.keys():
        site_sampler[site] = list()
        # Make representative list of atoms to draw from
        for atom in occupation[site].keys():
            n_atoms = round( occupation[site][atom]*site_count[site] )
            site_sampler[site] = site_sampler[site] + [atom]*n_atoms
        # List too long, pop random element
        while len(site_sampler[site]) > site_count[site]: 
            random.shuffle(site_sampler[site])
            site_sampler[site].pop()
        # List too short, fill with vacancies
        while len(site_sampler[site]) < site_count[site]:
            site_sampler[site].append('')

    # Replace atoms based on occupation fraction
    vacancies = list()
    for site in occupation.keys():
        random.shuffle(site_sampler[site])
    for atom in range(len(superlattice)):
        atom_site = site_list[atom]
        element_draw = site_sampler[atom_site][0]
        del site_sampler[atom_site][0]
        if element_draw == "":
            vacancies.append(atom)
        else:
            superlattice[atom].symbol = element_draw
    # Create vacancies
    for i in range(len(vacancies)):
        del superlattice[vacancies[i]-i]
        
    # Save the superlattice structure in a new CIF file
    write(output_dir+f_name+'_super.cif', superlattice)

print("Finished: processed "+ str(len(files)) +" files")

Ag2O_MPDS_S542119
{'3d': {'Ag': 1.0}, '1a': {'O': 1.0}}
{'3d': 256, '1a': 128}
Ba0.45Sr0.45Nd0.1Fe0.2Co0.8O3_MPDS_S1822169
{'3d': {'O': 1.0}, '1b': {'Ba': 0.45, 'Sr': 0.45, 'Nd': 0.1}, '1a': {'Co': 0.8, 'Fe': 0.2}}
{'3d': 192, '1b': 64, '1a': 64}
Ba0.4Sr0.4Ca0.2Fe0.2Co0.8O3_MPDS_S1826417
{'3d': {'O': 1.0}, '1b': {'Ba': 0.4, 'Sr': 0.4, 'Ca': 0.2}, '1a': {'Co': 0.8, 'Fe': 0.2}}
{'3d': 192, '1b': 64, '1a': 64}
Ba0.4Sr0.4La0.2Fe0.2Co0.8O3_MPDS_S1826419
{'3d': {'O': 1.0}, '1b': {'Ba': 0.4, 'Sr': 0.4, 'La': 0.2}, '1a': {'Co': 0.8, 'Fe': 0.2}}
{'3d': 192, '1b': 64, '1a': 64}
Ba0.5Cu0.5La0.5Fe0.5O2.51_MPDS_S1927382
{'3d': {'O': 0.837}, '1b': {'Ba': 0.5, 'La': 0.5}, '1a': {'Cu': 0.5, 'Fe': 0.5}}




{'3d': 192, '1b': 64, '1a': 64}
Ba0.5Cu0.5La0.5Fe0.5O2.735_MPDS_S1927380
{'3d': {'O': 0.912}, '1b': {'Ba': 0.5, 'La': 0.5}, '1a': {'Cu': 0.5, 'Fe': 0.5}}
{'3d': 192, '1b': 64, '1a': 64}
Ba0.5Cu0.5La0.5Fe0.5O2.7_MPDS_S1819807
{'3d': {'O': 0.9}, '1b': {'Ba': 0.5, 'La': 0.5}, '1a': {'Cu': 0.5, 'Fe': 0.5}}
{'3d': 192, '1b': 64, '1a': 64}
BaPb0.5Bi0.5O3_MPDS_S309957
{'3d': {'O': 1.0}, '1b': {'Ba': 1.0}, '1a': {'Bi': 0.5, 'Pb': 0.5}}
{'3d': 192, '1b': 64, '1a': 64}
Ce0.85Sm0.075Dy0.075O1.925_MPDS_S1245701
{'8c': {'O': 0.963}, '4a': {'Ce': 0.85, 'Dy': 0.075, 'Sm': 0.075}}




{'8c': 512, '4a': 256}
Ce0.8Gd0.1Co0.1O1.90_MPDS_S379306
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'Co': 0.1, 'Gd': 0.1}}
{'8c': 512, '4a': 256}
Ce0.8Sm0.06Dy0.14O1.9_MPDS_S1933347
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'Dy': 0.14, 'Sm': 0.06}}
{'8c': 512, '4a': 256}
Ce0.8Sm0.2O1.9_MPDS_S1933346
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'Sm': 0.2}}
{'8c': 512, '4a': 256}
La0.05Ce0.8Gd0.15O1.9_MPDS_S1931925
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'Gd': 0.15, 'La': 0.05}}
{'8c': 512, '4a': 256}
La0.1Y0.1Ce0.8O1.9_MPDS_S1227763
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'La': 0.1, 'Y': 0.1}}
{'8c': 512, '4a': 256}
La0.2Ce0.8O1.9_MPDS_S1227762
{'8c': {'O': 0.95}, '4a': {'Ce': 0.8, 'La': 0.2}}
{'8c': 512, '4a': 256}
La2Zr2O7_MPDS_S1227951
{'a': {'La': 1.0}, 'b': {'Zr': 1.0}, 'c': {'O': 1.0}, 'd': {'O': 1.0}}
{'a': 1024, 'b': 1024, 'c': 3584, 'd': 0}




Sr0.7La0.1Ce0.2FeO2.75_MPDS_S1819860
{'3d': {'O': 0.917}, '1b': {'Sr': 0.7, 'Ce': 0.2, 'La': 0.1}, '1a': {'Fe': 1.0}}
{'3d': 192, '1b': 64, '1a': 64}
SrCu0.3Fe0.3Co0.4O3_MPDS_S379509
{'3d': {'O': 1.0}, '1b': {'Sr': 1.0}, '1a': {'Co': 0.4, 'Cu': 0.3, 'Fe': 0.3}}
{'3d': 192, '1b': 64, '1a': 64}
SrTiO3_MPDS_S305037
{'3d': {'O': 1.0}, '1b': {'Sr': 1.0}, '1a': {'Ti': 1.0}}
{'3d': 192, '1b': 64, '1a': 64}
Finished: processed 38 files
