In [4]:
#import string
from pathlib import Path
import numpy as np
import pandas as pd
import sys, os
import timeit
#from typing import List
from pymatgen.io.cif import CifParser
# Old paper features
#from matminer.featurizers.base import BaseFeaturizer
from matminer.featurizers.site import GaussianSymmFunc, SiteElementalProperty,AGNIFingerprints
#from matminer.utils.data import MagpieData
from pymatgen.analysis.local_env import VoronoiNN
import os

BOND_MAX_DIST = 2.6  # Max distance for a bond in angstroms
# New Features
#rom matminer.featurizers.structure.bonding import BagofBonds, BondFractions, GlobalInstabilityIndex, StructuralHeterogeneity, MinimumRelativeDistances
#from matminer.featurizers.structure.matrix import CoulombMatrix, SineCoulombMatrix, OrbitalFieldMatrix
#from pymatgen.analysis import local_env


In [13]:
def featurize_dataset(cifs: list, verbos=False, saveto: str = "features.csv") -> pd.DataFrame:
    """Featurize crystal structures using elemetal, geometric, and chemical descriptors for local environments.

    :params cifs: list of paths to crystal structure in cif format
    :params verbos: printing the steps
    :params saveto: filename to save the generated features
    """
    
    ## Process Input Files
    print("Parsing CIFs")
    features = {}
    for cif in cifs:
        structure = CifParser(cif).get_structures()[0]
        structure_name = Path(cif).name
        features[structure_name] = {}
        features[structure_name]["structure"] = structure
        features[structure_name]["structure_name"] = Path(cif).name
        features[structure_name]["structure_path"] = str(Path(cif).parent)
    data = pd.DataFrame.from_dict(features).T
    
    ### SITE PROPERTIES ###
    # These will be paired as features
    ## 1. Initialize the dictionary for each site
    #  TODO: Combine these into one big loop with easily expandable features
    print("Assembling site property dictionary")
    site_features = {}
    for index, row in data.iterrows():
        structure = row["structure"]
        for atomidx in range(structure.num_sites):
            site_name = "%s_%i" % (index, atomidx)
            site_features[site_name] = {}
            site_features[site_name] = {"structure_name": row["structure_name"]}
            site_features[site_name].update({"structure_path": row["structure_path"]})
    
    ## 1. Site Elemental Property
    print("site elemental properties")
    property_list = ("Number", "AtomicWeight", "Row", "Column", "Electronegativity", "CovalentRadius")
    SEP = SiteElementalProperty(properties=property_list)
    colnames = SEP._generate_column_labels(multiindex=False, return_errors=False)
    for index, row in data.iterrows():
        structure = row["structure"]
        if verbos:
            print(index)
        for atomidx in range(structure.num_sites):
            feat = SEP.featurize(structure, idx=atomidx)
            site_name = "%s_%i" % (index, atomidx)
            site_features[site_name].update(dict(zip(colnames, feat)))

    ## 2. AGNI
    print("AGNI")
    property_list = ("Number", "AtomicWeight", "Row", "Column", "Electronegativity", "CovalentRadius")
    AGNI = AGNIFingerprints(cutoff=5, directions=[None])
    colnames = AGNI._generate_column_labels(multiindex=False, return_errors=False)
    for index, row in data.iterrows():
        structure = row["structure"]
        if verbos:
            print(index)
        for atomidx in range(structure.num_sites):
            feat = AGNI.featurize(structure, idx=atomidx)
            site_name = "%s_%i" % (index, atomidx)
            site_features[site_name].update(dict(zip(colnames, feat)))

    ## 3. Gaussian Symmetry Functions 
    print("GSF")
    GSF = GaussianSymmFunc(cutoff=5)
    colnames = GSF._generate_column_labels(multiindex=False, return_errors=False)
    for index, row in data.iterrows():
        structure = row["structure"]
        if verbos:
            print(index)
        for atomidx in range(structure.num_sites):
            feat = GSF.featurize(structure, idx=atomidx)
            site_name = "%s_%i" % (index, atomidx)
            site_features[site_name].update(dict(zip(colnames, feat)))

 
    ### BOND PAIRS AND BOND PROPERTIES ###
    print("Generating bond library")
    structures_bonds = {}  # Store bond pairs
    bond_properties = {}  # Store bond properties
    for index, row in data.iterrows():
        if verbos:
            print(index)
        structure = row["structure"]
        structures_bonds[index] = []
        bond_properties[index] = []
        neighbors = structure.get_neighbor_list(BOND_MAX_DIST)  # (center_indices, points_indices, offset_vectors, distances)
        for bond in range(len(neighbors[0])):
            if neighbors[0][bond] < neighbors[1][bond]:  # Don't double count bonds
                # Bonded indices
                structures_bonds[index].append((neighbors[0][bond], neighbors[1][bond]))
                # Bond properties (coord-num, bond-len)
                coord_num = list(neighbors[0]).count(neighbors[0][bond])
                bond_properties[index].append((coord_num, neighbors[3][bond]))
    
    """
    print("Generating bond library")
    # Structures_bonds is a list for each structure, each containing a list of tuples indicating bonded indices
    structures_bonds = {}  # Store bond pairs
    bond_properties = {}  # Store bond properties
    for index, row in data.iterrows():
        print(index)
        structure = row["structure"]
        voronoi = VoronoiNN()
        structures_bonds[index] = []  
        bond_properties[index] = []
        for atomidx in range(structure.num_sites):
            bonded_atoms = voronoi.get_nn_info(structure, atomidx)
            # Get Bond Information Here
            # Bond Index Pairs
            structures_bonds[index] += [(atomidx, info['site_index']) for info in bonded_atoms if info['site_index']>atomidx]
            # Bond properties
            bond_properties[index] += [(info['poly_info']['n_verts'], info['poly_info']['face_dist']) for info in bonded_atoms if info['site_index']>atomidx]
    print(structures_bonds)
    print(bond_properties)
    """
    
    # Build Dataframe by bonds
    print("Copying over data to final dataframe")
    delta_properties = ["site Electronegativity", "site AtomicWeight"]  # For these properties, take the difference as a feature
    bond_features = {}  # Final dictionary for saving features format: bond_features['material_bond#']["feature_name"] = data
    for index, row in data.iterrows():
        bond_len_sum = 0
        if verbos:
            print(index)
        for bond_idx in range(len(structures_bonds[index])):
            bond = structures_bonds[index][bond_idx]
            bond_name = "%s_Atom%i_Bond%i" % (index, bond[0], bond_idx)
            bond_features[bond_name] = {}
            site1_name = "%s_%i" % (index, bond[0])
            site2_name = "%s_%i" % (index, bond[1])
            
            # Add Site features to dictionary
            # Order putting heavier element first
            # TODO: this is not very efficient, save data directly to final dataframe in the end?
            site_feat_labels = site_features[site1_name].keys()
            site_feat_labels = [k for k in site_feat_labels if k not in ["structure_path", "structure_name"]]
            bond_features[bond_name]["structure_name"] = site_features[site1_name]["structure_name"]
            bond_features[bond_name]["structure_path"] = site_features[site1_name]["structure_path"]
            if site_features[site1_name]["site AtomicWeight"] > site_features[site2_name]["site AtomicWeight"]:
                for k in site_feat_labels:
                    if k in delta_properties:
                        bond_features[bond_name][k+"_diff"] = site_features[site1_name][k] - site_features[site2_name][k]
                    bond_features[bond_name][k+"_atom1"] = site_features[site1_name][k]
                    bond_features[bond_name][k+"_atom2"] = site_features[site2_name][k]
            else:
                for k in site_feat_labels:
                    if k in delta_properties:
                        bond_features[bond_name][k+"_diff"] = site_features[site2_name][k] - site_features[site1_name][k]
                    bond_features[bond_name][k+"_atom1"] = site_features[site2_name][k]
                    bond_features[bond_name][k+"_atom2"] = site_features[site1_name][k]
                    
            # Insert bond properties        
            coord_num, bond_len = bond_properties[index][bond_idx]
            bond_features[bond_name]["coordination_number"] = coord_num
            bond_features[bond_name]["bond_length"] = bond_len
            bond_len_sum += bond_len
            
        # Now add each bond's fraction of lattice volume
        for bond_idx in range(len(structures_bonds[index])):
            bond = structures_bonds[index][bond_idx]
            bond_name = "%s_Atom%i_Bond%i" % (index, bond[0], bond_idx)
            _, bond_len = bond_properties[index][bond_idx]
            bond_features[bond_name]["volume_fraction"] = bond_len/bond_len_sum
    
    ### SAVE FILE
    print("Saving to File!")
    df_features = pd.DataFrame.from_dict(bond_features).T
    if os.path.isfile(saveto):  # Append
        df_features.to_csv(saveto, mode='a', index=False, header=False)
    else:  # New file
        df_features.to_csv(saveto)

    return df_features


In [14]:
featurize_dataset(['supercells_data/15284_super.cif'], saveto='test_feat.csv')

Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!


Unnamed: 0,structure_name,structure_path,site Number_atom1,site Number_atom2,site AtomicWeight_diff,site AtomicWeight_atom1,site AtomicWeight_atom2,site Row_atom1,site Row_atom2,site Column_atom1,...,G4_0.005_1.0_1.0_atom2,G4_0.005_1.0_-1.0_atom1,G4_0.005_1.0_-1.0_atom2,G4_0.005_4.0_1.0_atom1,G4_0.005_4.0_1.0_atom2,G4_0.005_4.0_-1.0_atom1,G4_0.005_4.0_-1.0_atom2,coordination_number,bond_length,volume_fraction
15284_super.cif_Atom1_Bond0,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667
15284_super.cif_Atom1_Bond1,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667
15284_super.cif_Atom1_Bond2,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667
15284_super.cif_Atom1_Bond3,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667
15284_super.cif_Atom1_Bond4,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667
15284_super.cif_Atom1_Bond5,15284_super.cif,supercells_data,26.0,8.0,39.8456,55.845,15.9994,4.0,2.0,8.0,...,7.58149,3.59042,2.622812,3.157582,3.895691,0.623596,0.327758,6,1.9495,0.166667


In [None]:
# Batching files to reduce memory use
BATCH_SIZE = 5

# Load all CIF files in directory
file_type = "_super.cif"  # Use files with this ending in input_dir
input_dir = "supercells_data/"  # Input data directory
output_dir = "features/"  # Output directory
filename = "features.csv"  # Output filename for features

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)
    
files = os.listdir(input_dir)
cif_files = [input_dir+file for file in files if file.endswith(file_type)]

# Featurize all sturctures
n_batches = int(np.ceil(len(cif_files)/BATCH_SIZE))
if os.path.isfile(filename):  # Clean up any previous runs
    os.remove(filename)
    
for b in range(n_batches):
    print("Starting batch ", b)
    # Define which files to 
    idx_start = int(b*BATCH_SIZE)
    idx_end = int(min((b+1)*BATCH_SIZE, len(cif_files)))
    start = timeit.default_timer()
    data_frame = featurize_dataset(cif_files[idx_start:idx_end], saveto=output_dir+filename)
    print("Time elapsed: ", timeit.default_timer() - start)

print("Files processed: ", len(cif_files))

Starting batch  0
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  15.442842999938875
Starting batch  1
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  31.007436299696565
Starting batch  2
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  49.9479780998081
Starting batch  3
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  47.11997690005228
Starting batch  4
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  32.37097760010511
Starting batch  5
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  4.87524739978835
Starting batch  6
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  102.89528529997915
Starting batch  7
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  27.152877000160515
Starting batch  8
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  23.650380699895322
Starting batch  9
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI




GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  1.6389628001488745
Starting batch  10
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  140.2957760002464
Starting batch  11
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  45.16629830002785
Starting batch  12
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  7.207724799867719
Starting batch  13
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  15.936213300097734
Starting batch  14
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  16.289230400230736
Starting batch  15
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  5.394885099958628
Starting batch  16
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  19.407546299975365
Starting batch  17
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  26.56329660024494
Starting batch  18
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  5.364182699937373
Starting batch  19
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  47.72055110009387
Starting batch  20
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  55.15368110034615
Starting batch  21
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  123.79684560000896
Starting batch  22
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  4.735278400126845
Starting batch  23
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  97.06251950003207
Starting batch  24
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  34.117110799998045
Starting batch  25
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF




Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  0.3593379999510944
Starting batch  26
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI




GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  1.4443439999595284
Starting batch  27
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  6.009114700369537
Starting batch  28
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  40.69459229987115
Starting batch  29
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  0.32250559981912374
Starting batch  30
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  5.304924699943513
Starting batch  31
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  1.8563999002799392
Starting batch  32
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  4.282810499891639
Starting batch  33
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  0.44840400014072657
Starting batch  34
Parsing CIFs
Assembling site property dictionary
site elemental properties




AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  2.132695899810642
Starting batch  35
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  0.528914200142026
Starting batch  36
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  75.1154147000052
Starting batch  37
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  6.215857999864966
Starting batch  38
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  4.365175699815154
Starting batch  39
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  5.029151199851185
Starting batch  40
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  2.9736298001371324
Starting batch  41
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  8.0625428003259
Starting batch  42
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  2.7649218002334237
Starting batch  43
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  11.296341300010681
Starting batch  44
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  18.46152909984812
Starting batch  45
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  2.8527250001206994
Starting batch  46
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  8.543749999720603
Starting batch  47
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  2.256372299976647
Starting batch  48
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  0.5945496000349522
Starting batch  49
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  9.369598200079054
Starting batch  50
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  5.411290899850428
Starting batch  51
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  13.266949099954218
Starting batch  52
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  90.94156059995294
Starting batch  53
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  38.70517749991268
Starting batch  54
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  13.896633100230247
Starting batch  55
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  41.8086150996387
Starting batch  56
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  7.077391400001943
Starting batch  57
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  12.96621309965849
Starting batch  58
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  41.71295880014077
Starting batch  59
Parsing CIFs
Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  18.908360099885613
Starting batch  60
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  33.1684309002012
Starting batch  61
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  8.696342699695379
Starting batch  62
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  22.686417799908668
Starting batch  63
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  12.355670799966902
Starting batch  64
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  925.7152229999192
Starting batch  65
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  10.24521159986034
Starting batch  66
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  12.236424499657005
Starting batch  67
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF
Generating bond library
Copying over data to final dataframe
Saving to File!
Time elapsed:  21.228093899786472
Starting batch  68
Parsing CIFs




Assembling site property dictionary
site elemental properties
AGNI
GSF


In [3]:
   ## 1. Bag of Bonds
"""
print("bag of bonds")
BB = BagofBonds()
for index, row in data.iterrows():
    structure = row["structure"]
    if verbos:
        print(index)
    BB.fit([structure])
    feat = BB.bag(structure)
    print(feat)
    site = list(feat.keys())
    print(site[0])
    print(structure[site])

print("bond fraction")
BF = BondFractions()
for index, row in data.iterrows():
    structure = row["structure"]
    if verbos:
        print(index)
    feat = BF.fit_transform([structure])
    #print(feat)
    #print(BF.feature_labels())
"""
## 5. site difference stats 
"""
print("LPD")
LPD = LocalPropertyStatsNew(properties=property_list)
colnames = LPD._generate_column_labels(multiindex=False, return_errors=False)
for index, row in data.iterrows():
    structure = row["structure"]
    if verbos:
        print(index)
    for atomidx in range(structure.num_sites):
        feat = LPD.featurize(structure, idx=atomidx)
        site_name = "%s_%i" % (index, atomidx)
        site_features[site_name].update(dict(zip(colnames, feat)))
"""
    