In [None]:
import pandas as pd
import os
import shutil
import re
import sys
sys.path.append(r'../scripts')

from data_featurization import load_data, load_cif_structure, order_structure, remove_li_from_mixed_sites, generate_ordered_struc_list

from pymatgen.transformations.standard_transformations import OrderDisorderedStructureTransformation
from func_timeout import func_timeout, FunctionTimedOut

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm
tqdm.pandas()

### Define paths for relevant directories

Please note that ICSD CIFs are not supplied in this repository.

In [None]:
icsd_li_cifs_path = r"..\data\icsd_li_cifs"
labelled_conductivity_database_path = r"..\data\ionic_conductivity_database_11022023.csv"
output_ordered_cifs_dir = r"..\data\ordered_icsd_wo_li_cifs"

### Load labelled database and associated Pymatgen Structures

In [None]:
labelled_conductivity_database = load_data(labelled_conductivity_database_path)
labelled_conductivity_database['structure'] = labelled_conductivity_database['icsd_collectioncode'].progress_apply(load_cif_structure, cif_dir=icsd_li_cifs_path) 
labelled_conductivity_database['structure_wo_Li'] = labelled_conductivity_database['structure'].progress_apply(remove_li_from_mixed_sites) 

### Order structures using Pymatgen OrderDisorderedStructureTransformation

Please note that this process can take exceedingly long for some structures and so ordering was run in parallel on an HPC with appropriate timeouts.

In [None]:
os.makedirs(output_ordered_cifs_dir, exist_ok=True)

for index, row in tqdm(labelled_conductivity_database.iterrows(), total=labelled_conductivity_database.shape[0]):
    
    icsd_code = row['icsd_collectioncode']
    structure = row['structure_wo_Li']
    print(f'Attempting to order {icsd_code}')
    
    # Check if the structure is already ordered
    if structure.is_ordered:
        # Copy the original CIF to the output directory with the new filename
        input_cif_path = os.path.join(icsd_li_cifs_path, f"{icsd_code}.cif")
        output_cif_path = os.path.join(output_ordered_cifs_dir, f"{icsd_code}_ordered.cif")
        if os.path.exists(input_cif_path):
            shutil.copy(input_cif_path, output_cif_path)
            print(f"Copied ordered structure for ICSD {icsd_code}")
        else:
            print(f"CIF file for ICSD {icsd_code} not found.")
    else:
        ordered_list = generate_ordered_struc_list(structure)
        if ordered_list is not None and len(ordered_list) > 0:
            # Take the first ordered structure and save as a new CIF
            ordered_structure = ordered_list[0]['structure']
            output_cif_path = os.path.join(output_ordered_cifs_dir, f"{icsd_code}_ordered.cif")
            ordered_structure.to(output_cif_path, fmt="cif")
            print(f"Saved ordered structure for ICSD {icsd_code}")
        else:
            print(f"Failed to order structure for ICSD {icsd_code}")