In [1]:
import random
import itertools
import glob
import numpy as np
import sparse
from tqdm import tqdm
from PIL import Image
import torch

import matplotlib.pyplot as plt

In [2]:
def create_voxel_grid(cif_file,voxel_count, max_dims, atoms_used=['Ce','O']):
    '''
    Voxelize the a single .cif file. Automatically detects which of 2 .cif file formats is used.
    Inputs:
    cif_file - str, path to .cif file
    voxel_count - list [x,y,z] or scalar if all same, number of voxels per side.
    max_dims - list [x,y,z] of max dimensions across structures. Used to set appropriate
               grid depth in z dimension, which varies across structures.
    atoms_used - list, indicates which atoms to include 
    
    Outputs:
    grid - 3D numpy array [voxel_size_x,voxel_size_y,voxel_size_z] 
           where values indicate number of atoms centered in the voxel
    coor - numpy array [n_atoms,3] of coordinates of each atom
    atom_type numpy array [n_atoms] of atom type
    
    NOTE: coor and atom_type include all atoms, not just those in atoms_used.
    '''
    # Read the .cif file
    f = open(cif_file)
    y = f.readlines()

    # Box dimensions
    # Extract '_cell_length_c' from .cif file. This length varies for each structure.
    increm = 0
    # try-except accounts for new .cif format where cell lengths start on line 3
    try:
        x_dim = float(y[2].split('  ')[-1])
    except ValueError:
        increm = 1
        x_dim = float(y[2+increm].split('  ')[-1])
    y_dim = float(y[3+increm].split('  ')[-1])
    z_dim = float(y[4+increm].split('  ')[-1])

    # Extract the atom locations
    if len(y[15].split("  ")) < 4:

        # for .cif files from ASU_April_21
        if len(y[23].split("  ")) < 4:
            z=np.array([x.split("  ") for x in y[26:]])
            coor = np.array([[float(y) for y in x] for x in z[:,4:7]])
            atom_type = np.array([x.split(" ")[0] for x in z[:,1]])

        # for .cif files from models_wedge_cif and models_wedge_cif_2
        else:
            z=np.array([x.split("  ") for x in y[23:-1]])
            coor = np.array([[float(y) for y in x] for x in z[:,2:5]])
            atom_type = np.array([x.split(" ")[0] for x in z[:,1]])

    # for .cif files from models_cif
    elif len(y[15].split("  ")) == 4:
        z = np.array([x.split("  ") for x in y[15:]]) 
        coor = np.array([[float(y) for y in x] for x in z[:,1:]]) 
        atom_type = z[:,0]

    # Voxelize
    # .cif describes a 1x1x1 box, so voxel_count of 0.25 would create 4 voxels per dimension (4^3 total voxels)
    if type(voxel_count) == list:
        voxel_count_x,voxel_count_y,voxel_count_z = voxel_count
    elif (type(voxel_count) == int) or (type(voxel_count) == float):
        voxel_count_x = voxel_count
        voxel_count_y = voxel_count
        voxel_count_z = voxel_count
        
    # grid has number of channels equal to number of atom types in lattice
    num_atoms = len(atoms_used)
    atom_mapping = {at:i for i,at in enumerate(atoms_used)}
    grid = np.zeros([voxel_count_x,voxel_count_y,voxel_count_z,num_atoms])

    # Normalize coor for axis lengths
    coor = coor*np.array([x_dim,y_dim,z_dim])/np.array(max_dims)
    
    for i,atom in enumerate(coor):
        if atom_type[i] in atoms_used:
            x = int(np.floor(atom[0]*voxel_count_x))
            y = int(np.floor(atom[1]*voxel_count_y))
            z = int(np.floor(atom[2]*voxel_count_z))
            grid[x,y,z,atom_mapping[atom_type[i]]] += 1
    grid = sparse.COO.from_numpy(grid)
    return grid, coor, atom_type

# Determine max z length
def max_z_value(dir_header, dir_list_cif):

    cif_files = []

    for dirr in dir_list_cif:
        cif_files.extend(glob.glob(dir_header+dirr+'/Ce*.cif'))

    x_list,y_list,z_list = [],[],[]

    for cf in cif_files:
        f = open(cf)
        y = f.readlines()
        # increm accounts for new format .cif files
        increm = 0
        try:
            x_list.append(float(y[2].split('  ')[-1]))
        except ValueError:
            increm = 1
            x_list.append(float(y[2+increm].split('  ')[-1]))
        y_list.append(float(y[3+increm].split('  ')[-1]))
        z_list.append(float(y[4+increm].split('  ')[-1]))

    return [np.max(x_list),np.max(y_list),np.max(z_list)]

In [12]:
def generate_training_data(dir_header, dir_list_cif, dir_list_img, voxel_count, 
                           atoms_used, defocus_in, im_size = 256):
    '''
    Generates pairs of voxel grid (X) and image (y) as well as defocus parameter and filenames, for training
    3D grid -> image model.
    
    Inputs:
    dir_list_cif - list of directories containing .cif files, relative path from current directory
    dir_list_img - list of directories containing .yif files, relative path from current directory
    voxel_count - scalar int indicating desired voxels per grid dimension
    atoms_used - List of strings indicating which atom types (e.g. ['O', 'Ce']) to include. 
    defocus_used - List of ints indicating which defocus values to include. If set to 1, all values are used.
    im_size - 2-tuple of ints indicating desired pixels per image dimension (X,Y)
    
    Outputs:
    X_list - List of [grid, atom_type] lists for each .cif-.tif pair used. 
              grid is a [voxel_size^3] np.array with counts of atoms in each voxel
              atom_type is a np.array of strings for each atom's periodic symbol
                
    y_list - List of images, one for each entry in X_list. image is a [im_size[0],im_size[1]] np.array of pixel values.
              image created by cropping input to square then resizing to im_size in PIL.
    defocus_list - List of ints, defocus parameter for each sample 
    img_file_list - List of image filenames for each sample.
    
    '''
    # Calculate largest z value
    max_dims = max_z_value(dir_header,dir_list_cif)
    
    cif_files = []
    for dirr in dir_list_cif:
        cif_files.extend(glob.glob(dir_header+dirr+'/Ce*.cif'))


    X_list = []
    y_list = []
    defocus_list = []
    img_file_list = []

    for k,cif in enumerate(cif_files):
        # Create voxel grid from .cif file 
        X, coor, atom_type = create_voxel_grid(cif,voxel_count, max_dims, atoms_used=atoms_used)

        # Find all associated images
        img_files = []
        if cif.find("CIF") < 0:
            [img_files.extend(glob.glob((dir_header+dirr+cif[cif.find("/",len(dir_header)+3):-4]+'*.tif').replace('[','?').replace(']','?'))) for dirr in dir_list_img]
        else:
            [img_files.extend(glob.glob((dir_header+dirr+cif[cif.find("/",len(dir_header)+len(dirr)+3):-4]+'*.tif').replace('[','?').replace(']','?'))) for dirr in dir_list_img]
        # find starts after the dir_header so it catches the last /
        # Filter out 'def' files
        img_files_clean = []
        for i,im_f in enumerate(img_files):
            if im_f.find("_def_") < 0:
                img_files_clean.append(im_f)
        img_files = img_files_clean        
        # Filter by Defocus value
        nm_loc = [im_f.find("nmDefocus") for im_f in img_files]
        
#         defocus_cif = [abs(int(im_f[loc-2:loc])) for im_f,loc in zip(img_files,nm_loc)]

        defocus_cif = []
        for im_f,loc in zip(img_files,nm_loc):
            if im_f[loc-2:loc-1] == '_':
                defocus_cif.append(abs(int(im_f[loc-1:loc])))
            else:
                defocus_cif.append(abs(int(im_f[loc-2:loc])))
            
    
        if defocus_in == 1:
            defocus_used = set(defocus_cif)
        else:
            defocus_used = defocus_in

        #X_count counts the number of images for each .cif file, to replicate the voxel that many times
        X_count = 0
        # Collect image data
        for defoc,image in zip(defocus_cif,img_files):
            if defoc in defocus_used:
                X_count +=1
                im_data=np.array(Image.open(image).resize((im_size[0],im_size[1])))
                y_list.append(im_data)
                defocus_list.append(defoc)
                img_file_list.append(image)    

        # Make training pairs for each defocus value in defocus_used
        X_list.extend(itertools.repeat(X,X_count)) # Probably move this so it can be repeated for various defocus values
        print("Images to cif {}: {}\t{} ".format(k,X_count,cif))
    return X_list, y_list, defocus_list, img_file_list

In [13]:
# Inputs
# dir_header = '../../em_data/'
# dir_list_cif = ['models_cif','models_wedge_cif','models_wedge_cif_2']
# dir_list_img = ['all_images','all_images_wedge','all_images_wedge_2']

dir_header = 'em_data/'
dir_list_cif = ['models_cif','models_wedge_cif','models_wedge_cif_2',
                '1at/CIF','2at/CIF','2at_2/CIF','3at/CIF',
                '3at_2/CIF','4at/CIF','4at_2/CIF',
                '5at/CIF','5at_2/CIF','6at/CIF','6at_2/CIF','7at/CIF','7at_2/CIF','8at/CIF',
                '8at_2/CIF','9at/CIF','9at_2/CIF','10at/CIF','10at_2/CIF']
dir_list_img = ['all_images','all_images_wedge','all_images_wedge_2',
                '1at','2at','2at_2','3at',
                '3at_2','4at','4at_2','5at','5at_2','6at','6at_2',
                '7at','7at_2','8at','8at_2','9at','9at_2','10at','10at_2']

voxel_count = [84,54,98]
im_size = [84,54]
atoms_used=['Ce','O']
defocus_used = 1 # list defocus values to use, or set =1 to use all

In [10]:
cif = 'jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z.cif'
glob.glob((dir_header+'3at_2'+cif[cif.find("/",len(dir_header)+5):-4]+'*.tif').replace('[','?').replace(']','?'))

['em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_2nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_8nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_10nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_6nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_3nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_7nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_9nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_5nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z_060slc_2048x2048_4nmDefocus.tif',
 'em_data/3at_2/CeO2_[110]_110sur_step_3mu_1-

In [None]:
img_files = []
[img_files.extend(glob.glob((dir_header+dirr+cif[cif.find("/",len(dir_header)+5):-4]+'*.tif').replace('[','?').replace(']','?'))) for dirr in dir_list_img]
# find starts after the dir_header so it catches the last /
# Filter out 'def' files
img_files_clean = []
for i,im_f in enumerate(img_files):
    if im_f.find("_def_") < 0:
        img_files_clean.append(im_f)
img_files = img_files_clean        
# Filter by Defocus value
img_files

In [None]:
nm_loc = [im_f.find("nmDefocus") for im_f in img_files]

#         defocus_cif = [abs(int(im_f[loc-2:loc])) for im_f,loc in zip(img_files,nm_loc)]
defocus_cif = []
for im_f,loc in zip(img_files,nm_loc):
    if im_f[loc-2:loc-1] == '_':
        defocus_cif.append(abs(int(im_f[loc-1:loc])))
    else:
        defocus_cif.append(abs(int(im_f[loc-2:loc])))

In [None]:
defocus_used = set(defocus_cif)

#X_count counts the number of images for each .cif file, to replicate the voxel that many times
X_count = 0
# Collect image data
for defoc,image in zip(defocus_cif,img_files):
    if defoc in defocus_used:
        X_count +=1
        im_data=np.array(Image.open(image).resize((im_size[0],im_size[1])))
#         y_list.append(im_data)
#         defocus_list.append(defoc)
#         img_file_list.append(image) 
X_count

In [None]:
'CeO2_[110]_110sur_step_3mu_1-2def_expanded_000x000y000z'.find("_def_")

In [14]:
X_list, y_list, defocus_list, img_file_list = generate_training_data( \
                            dir_header, dir_list_cif, dir_list_img, voxel_count, atoms_used, defocus_used, im_size)

Images to cif 0: 11	em_data/models_cif/CeO2_011_100sur_8mu.cif 
Images to cif 1: 11	em_data/models_cif/CeO2_011_corner_3mu.cif 
Images to cif 2: 11	em_data/models_cif/CeO2_011_100sur_5mu.cif 
Images to cif 3: 11	em_data/models_cif/CeO2_011_111sur_10mu.cif 
Images to cif 4: 11	em_data/models_cif/CeO2_011_corner_7mu.cif 
Images to cif 5: 11	em_data/models_cif/CeO2_011_cornerb_2mu.cif 
Images to cif 6: 11	em_data/models_cif/CeO2_011_100sur_2mu.cif 
Images to cif 7: 11	em_data/models_cif/CeO2_011_corner_10mu.cif 
Images to cif 8: 11	em_data/models_cif/CeO2_011_111sur_4mu.cif 
Images to cif 9: 11	em_data/models_cif/CeO2_011_111sur_2mu.cif 
Images to cif 10: 11	em_data/models_cif/CeO2_011_100sur_4mu.cif 
Images to cif 11: 11	em_data/models_cif/CeO2_011_cornerb_4mu.cif 
Images to cif 12: 11	em_data/models_cif/CeO2_011_111sur_3mu.cif 
Images to cif 13: 11	em_data/models_cif/CeO2_011_100sur_6mu.cif 
Images to cif 14: 11	em_data/models_cif/CeO2_011_100sur_1mu.cif 
Images to cif 15: 11	em_data/mo

Images to cif 108: 10	em_data/1at/CIF/CeO2_[110]_110sur_step_1mu_0-0def_expanded_-31x295y000z.cif 
Images to cif 109: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_0-1def_expanded_000x000y000z.cif 
Images to cif 110: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_1-1def_expanded_262x-123y000z.cif 
Images to cif 111: 10	em_data/1at/CIF/CeO2_[110]_110sur_step_1mu_1-1def_expanded_000x000y000z.cif 
Images to cif 112: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_0-0def_expanded_218x-393y000z.cif 
Images to cif 113: 10	em_data/1at/CIF/CeO2_[110]_110sur_1mu_1-1def_expanded_-336x231y000z.cif 
Images to cif 114: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_0-1def_expanded_284x15y000z.cif 
Images to cif 115: 10	em_data/1at/CIF/CeO2_[110]_110sur_1mu_0-0def_expanded_-241x-044y000z.cif 
Images to cif 116: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_0-1def_expanded_284x186y000z.cif 
Images to cif 117: 10	em_data/1at/CIF/CeO2_[110]_110sur_sawtooh_1mu_0-1def_expanded_049x-338y000z.ci

Images to cif 191: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_1-2def_expanded_07x232y000z.cif 
Images to cif 192: 0	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_2-2def_expanded_209x211y000z.cif 
Images to cif 193: 0	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_2-2def_expanded_129x-201y000z.cif 
Images to cif 194: 10	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_0-0def_expanded_398x-043y000z.cif 
Images to cif 195: 10	em_data/2at/CIF/CeO2_[110]_110sur_sawtooh_2mu_1-1def_expanded_-072x-179y000z.cif 
Images to cif 196: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_1-1def_expanded_363x-109y000z.cif 
Images to cif 197: 10	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_0-0def_expanded_078x-285y000z.cif 
Images to cif 198: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_1-2def_expanded_-169x158y000z.cif 
Images to cif 199: 0	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_1-2def_expanded_331x228y000z.cif 
Images to cif 200: 10	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_1-1def_expanded_-395x399y000z.cif 
Images to cif 201:

Images to cif 274: 10	em_data/2at/CIF/CeO2_[110]_110sur_sawtooh_2mu_1-1def_expanded_068x-263y000z.cif 
Images to cif 275: 10	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_0-0def_expanded_000x000y000z.cif 
Images to cif 276: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_0-0def_expanded_-304x-052y000z.cif 
Images to cif 277: 10	em_data/2at/CIF/CeO2_[110]_110sur_sawtooh_2mu_0-0def_expanded_122x-254y000z.cif 
Images to cif 278: 10	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_0-0def_expanded_291x-043y000z.cif 
Images to cif 279: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_2-2def_expanded_163x109y000z.cif 
Images to cif 280: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_2-2def_expanded_-033x-38y000z.cif 
Images to cif 281: 0	em_data/2at/CIF/CeO2_[110]_110sur_step_2mu_2-2def_expanded_129x-284y000z.cif 
Images to cif 282: 10	em_data/2at/CIF/CeO2_[110]_110sur_2mu_1-2def_expanded_-169x232y000z.cif 
Images to cif 283: 10	em_data/2at/CIF/CeO2_[110]_110sur_sawtooh_2mu_2-2def_expanded_381x169y000z.cif 
Images to cif 

Images to cif 356: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_sawtooh_2mu_2-2def_expanded_-091x-263y000z.cif 
Images to cif 357: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_2mu_1-1def_expanded_393x357y000z.cif 
Images to cif 358: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_step_2mu_0-0def_expanded_398x226y000z.cif 
Images to cif 359: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_step_2mu_1-1def_expanded_000x000y000z.cif 
Images to cif 360: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_2mu_1-2def_expanded_112x-116y000z.cif 
Images to cif 361: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_sawtooh_2mu_2-2def_expanded_-091x-229y000z.cif 
Images to cif 362: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_2mu_2-2def_expanded_163x-38y000z.cif 
Images to cif 363: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_sawtooh_2mu_1-1def_expanded_068x-179y000z.cif 
Images to cif 364: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_sawtooh_2mu_0-0def_expanded_122x12y000z.cif 
Images to cif 365: 10	em_data/2at_2/CIF/CeO2_[110]_110sur_sawtooh_2mu_1-1def_expanded_000

Images to cif 438: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_1-2def_expanded_174x-023y000z.cif 
Images to cif 439: 0	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_1-2def_expanded_349x-069y000z.cif 
Images to cif 440: 10	em_data/3at/CIF/CeO2_[110]_110sur_3mu_0-0def_expanded_-059x-245y000z.cif 
Images to cif 441: 10	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_1-1def_expanded_-245x329y000z.cif 
Images to cif 442: 0	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_2-2def_expanded_-183x-389y000z.cif 
Images to cif 443: 10	em_data/3at/CIF/CeO2_[110]_110sur_3mu_0-1def_expanded_04x-109y000z.cif 
Images to cif 444: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_1-1def_expanded_-361x272y000z.cif 
Images to cif 445: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_0-0def_expanded_000x000y000z.cif 
Images to cif 446: 0	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_1-2def_expanded_349x385y000z.cif 
Images to cif 447: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_1-2def_expanded_174x39y000z.cif 
Imag

Images to cif 521: 10	em_data/3at/CIF/CeO2_[110]_110sur_3mu_1-1def_expanded_298x049y000z.cif 
Images to cif 522: 10	em_data/3at/CIF/CeO2_[110]_110sur_3mu_0-1def_expanded_328x-014y000z.cif 
Images to cif 523: 10	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_1-1def_expanded_-322x329y000z.cif 
Images to cif 524: 0	em_data/3at/CIF/CeO2_[110]_110sur_step_3mu_1-2def_expanded_298x385y000z.cif 
Images to cif 525: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_1-2def_expanded_334x228y000z.cif 
Images to cif 526: 10	em_data/3at/CIF/CeO2_[110]_110sur_sawtooh_3mu_0-0def_expanded_054x211y000z.cif 
Images to cif 527: 10	em_data/3at/CIF/CeO2_[110]_110sur_3mu_1-2def_expanded_035x113y000z.cif 
Images to cif 528: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_sawtooh_3mu_0-0def_expanded_-05x-22y000z.cif 
Images to cif 529: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_step_3mu_1-1def_expanded_-322x294y000z.cif 
Images to cif 530: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_step_3mu_1-2def_expanded_298x-069y000z.cif 
Images 

Images to cif 603: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_sawtooh_3mu_1-2def_expanded_334x-023y000z.cif 
Images to cif 604: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_3mu_0-0def_expanded_196x-392y000z.cif 
Images to cif 605: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_3mu_0-1def_expanded_328x-109y000z.cif 
Images to cif 606: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_3mu_0-0def_expanded_105x-392y000z.cif 
Images to cif 607: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_3mu_1-2def_expanded_157x113y000z.cif 
Images to cif 608: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_step_3mu_0-0def_expanded_-319x-008y000z.cif 
Images to cif 609: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_sawtooh_3mu_2-2def_expanded_009x068y000z.cif 
Images to cif 610: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_sawtooh_3mu_2-2def_expanded_009x063y000z.cif 
Images to cif 611: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_sawtooh_3mu_0-0def_expanded_102x-22y000z.cif 
Images to cif 612: 10	em_data/3at_2/CIF/CeO2_[110]_110sur_3mu_0-0def_expanded_196x-345y000z.c

Images to cif 686: 10	em_data/4at/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-2def_expanded_-05x-243y000z.cif 
Images to cif 687: 0	em_data/4at/CIF/CeO2_[110]_110sur_step_4mu_1-2def_expanded_197x297y000z.cif 
Images to cif 688: 10	em_data/4at/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-1def_expanded_105x-304y000z.cif 
Images to cif 689: 0	em_data/4at/CIF/CeO2_[110]_110sur_step_4mu_2-2def_expanded_385x027y000z.cif 
Images to cif 690: 10	em_data/4at/CIF/CeO2_[110]_110sur_step_4mu_0-0def_expanded_058x-379y000z.cif 
Images to cif 691: 0	em_data/4at/CIF/CeO2_[110]_110sur_step_4mu_1-2def_expanded_357x-393y000z.cif 
Images to cif 692: 10	em_data/4at/CIF/CeO2_[110]_110sur_4mu_1-1def_expanded_-156x-377y000z.cif 
Images to cif 693: 10	em_data/4at/CIF/CeO2_[110]_110sur_4mu_1-1def_expanded_-218x-377y000z.cif 
Images to cif 694: 10	em_data/4at/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-1def_expanded_-078x003y000z.cif 
Images to cif 695: 10	em_data/4at/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-1def_expanded_105x135y000z.cif 
Im

Images to cif 769: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_step_4mu_2-2def_expanded_178x047y000z.cif 
Images to cif 770: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_2-2def_expanded_169x-026y000z.cif 
Images to cif 771: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_step_4mu_1-1def_expanded_-069x-362y000z.cif 
Images to cif 772: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_2-2def_expanded_367x329y000z.cif 
Images to cif 773: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_0-0def_expanded_052x207y000z.cif 
Images to cif 774: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_2-2def_expanded_394x-364y000z.cif 
Images to cif 775: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_0-0def_expanded_-161x-13y000z.cif 
Images to cif 776: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_1-1def_expanded_-156x-345y000z.cif 
Images to cif 777: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_0-0def_expanded_082x-227y000z.cif 
Images to cif 778: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-2def_expanded_-05x08y000z.cif 

Images to cif 850: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_2-2def_expanded_325x-005y000z.cif 
Images to cif 851: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_2-2def_expanded_000x000y000z.cif 
Images to cif 852: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-1def_expanded_349x003y000z.cif 
Images to cif 853: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_1-2def_expanded_022x08y000z.cif 
Images to cif 854: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_2-2def_expanded_169x-364y000z.cif 
Images to cif 855: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_sawtooh_4mu_0-0def_expanded_000x000y000z.cif 
Images to cif 856: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_step_4mu_2-2def_expanded_251x027y000z.cif 
Images to cif 857: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_1-2def_expanded_000x000y000z.cif 
Images to cif 858: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_step_4mu_1-2def_expanded_-142x103y000z.cif 
Images to cif 859: 10	em_data/4at_2/CIF/CeO2_[110]_110sur_4mu_1-1def_expanded_000x000y000z.c

Images to cif 933: 10	em_data/5at/CIF/CeO2_[110]_110sur_sawtooh_5mu_2-2def_expanded_275x139y000z.cif 
Images to cif 934: 10	em_data/5at/CIF/CeO2_[110]_110sur_5mu_0-0def_expanded_000x000y000z.cif 
Images to cif 935: 10	em_data/5at/CIF/CeO2_[110]_110sur_5mu_1-1def_expanded_000x000y000z.cif 
Images to cif 936: 10	em_data/5at/CIF/CeO2_[110]_110sur_sawtooh_5mu_2-2def_expanded_-379x139y000z.cif 
Images to cif 937: 10	em_data/5at/CIF/CeO2_[110]_110sur_step_5mu_1-1def_expanded_-305x-096y000z.cif 
Images to cif 938: 10	em_data/5at/CIF/CeO2_[110]_110sur_sawtooh_5mu_1-2def_expanded_-176x-114y000z.cif 
Images to cif 939: 10	em_data/5at/CIF/CeO2_[110]_110sur_5mu_1-2def_expanded_-017x-358y000z.cif 
Images to cif 940: 10	em_data/5at/CIF/CeO2_[110]_110sur_5mu_1-1def_expanded_193x152y000z.cif 
Images to cif 941: 10	em_data/5at/CIF/CeO2_[110]_110sur_5mu_0-0def_expanded_-24x092y000z.cif 
Images to cif 942: 0	em_data/5at/CIF/CeO2_[110]_110sur_step_5mu_2-2def_expanded_30x132y000z.cif 
Images to cif 943: 10

Images to cif 1016: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_1-2def_expanded_-003x-114y000z.cif 
Images to cif 1017: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_5mu_1-2def_expanded_-116x-358y000z.cif 
Images to cif 1018: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_0-0def_expanded_397x017y000z.cif 
Images to cif 1019: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_5mu_0-0def_expanded_038x27y000z.cif 
Images to cif 1020: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_1-1def_expanded_-21x-311y000z.cif 
Images to cif 1021: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_0-0def_expanded_000x000y000z.cif 
Images to cif 1022: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_1-1def_expanded_000x000y000z.cif 
Images to cif 1023: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_2-2def_expanded_-286x292y000z.cif 
Images to cif 1024: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_1-2def_expanded_-00x-00y000z.cif 
Images to cif 1025: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_0-0def_

Images to cif 1097: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_0-0def_expanded_021x017y000z.cif 
Images to cif 1098: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_5mu_0-0def_expanded_174x-12y000z.cif 
Images to cif 1099: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_1-2def_expanded_-176x-189y000z.cif 
Images to cif 1100: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_5mu_0-0def_expanded_174x092y000z.cif 
Images to cif 1101: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_0-0def_expanded_021x-203y000z.cif 
Images to cif 1102: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_0-0def_expanded_000x000y000z.cif 
Images to cif 1103: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_1-1def_expanded_101x241y000z.cif 
Images to cif 1104: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_sawtooh_5mu_2-2def_expanded_000x000y000z.cif 
Images to cif 1105: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_2-2def_expanded_194x292y000z.cif 
Images to cif 1106: 10	em_data/5at_2/CIF/CeO2_[110]_110sur_step_5mu_1-1def_expan

Images to cif 1180: 10	em_data/6at/CIF/CeO2_[110]_110sur_6mu_1-2def_expanded_-146x-104y000z.cif 
Images to cif 1181: 10	em_data/6at/CIF/CeO2_[110]_110sur_6mu_2-2def_expanded_-06x224y000z.cif 
Images to cif 1182: 10	em_data/6at/CIF/CeO2_[110]_110sur_6mu_0-0def_expanded_108x197y000z.cif 
Images to cif 1183: 10	em_data/6at/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-2def_expanded_-292x379y000z.cif 
Images to cif 1184: 10	em_data/6at/CIF/CeO2_[110]_110sur_sawtooh_6mu_0-0def_expanded_-10x215y000z.cif 
Images to cif 1185: 10	em_data/6at/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-2def_expanded_028x-337y000z.cif 
Images to cif 1186: 10	em_data/6at/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-2def_expanded_-292x-185y000z.cif 
Images to cif 1187: 10	em_data/6at/CIF/CeO2_[110]_110sur_6mu_2-2def_expanded_-28x057y000z.cif 
Images to cif 1188: 10	em_data/6at/CIF/CeO2_[110]_110sur_6mu_1-1def_expanded_-234x-018y000z.cif 
Images to cif 1189: 0	em_data/6at/CIF/CeO2_[110]_110sur_step_6mu_2-2def_expanded_-382x-213y000z.cif 
Imag

Images to cif 1262: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_step_6mu_1-1def_expanded_-345x-338y000z.cif 
Images to cif 1263: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_1-2def_expanded_-385x-104y000z.cif 
Images to cif 1264: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_0-0def_expanded_000x000y000z.cif 
Images to cif 1265: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_2-2def_expanded_007x-29y000z.cif 
Images to cif 1266: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_step_6mu_0-0def_expanded_16x21y000z.cif 
Images to cif 1267: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_0-0def_expanded_108x266y000z.cif 
Images to cif 1268: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_2-2def_expanded_-117x09y000z.cif 
Images to cif 1269: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_1-1def_expanded_-234x-20y000z.cif 
Images to cif 1270: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-1def_expanded_000x000y000z.cif 
Images to cif 1271: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_1-2def_expanded_-146x-179y0

Images to cif 1343: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-2def_expanded_028x-185y000z.cif 
Images to cif 1344: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_1-2def_expanded_-385x-113y000z.cif 
Images to cif 1345: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_step_6mu_1-2def_expanded_-318x069y000z.cif 
Images to cif 1346: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_0-0def_expanded_-398x-328y000z.cif 
Images to cif 1347: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_0-0def_expanded_-10x-329y000z.cif 
Images to cif 1348: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-1def_expanded_-349x-385y000z.cif 
Images to cif 1349: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_step_6mu_2-2def_expanded_248x18y000z.cif 
Images to cif 1350: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_6mu_1-1def_expanded_-161x-319y000z.cif 
Images to cif 1351: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_sawtooh_6mu_1-2def_expanded_000x000y000z.cif 
Images to cif 1352: 10	em_data/6at_2/CIF/CeO2_[110]_110sur_step_6mu

Images to cif 1425: 10	em_data/7at/CIF/CeO2_[110]_110sur_sawtooh_7mu_0-0def_expanded_244x-024y000z.cif 
Images to cif 1426: 0	em_data/7at/CIF/CeO2_[110]_110sur_step_7mu_2-2def_expanded_000x000y000z.cif 
Images to cif 1427: 0	em_data/7at/CIF/CeO2_[110]_110sur_step_7mu_1-2def_expanded_209x-117y000z.cif 
Images to cif 1428: 10	em_data/7at/CIF/CeO2_[110]_110sur_7mu_2-2def_expanded_-162x-256y000z.cif 
Images to cif 1429: 10	em_data/7at/CIF/CeO2_[110]_110sur_7mu_1-1def_expanded_-238x133y000z.cif 
Images to cif 1430: 10	em_data/7at/CIF/CeO2_[110]_110sur_7mu_1-2def_expanded_279x288y000z.cif 
Images to cif 1431: 10	em_data/7at/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-1def_expanded_-02x113y000z.cif 
Images to cif 1432: 0	em_data/7at/CIF/CeO2_[110]_110sur_step_7mu_2-2def_expanded_38x-106y000z.cif 
Images to cif 1433: 10	em_data/7at/CIF/CeO2_[110]_110sur_7mu_1-2def_expanded_-253x288y000z.cif 
Images to cif 1434: 0	em_data/7at/CIF/CeO2_[110]_110sur_step_7mu_2-2def_expanded_38x-219y000z.cif 
Images to ci

Images to cif 1507: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-1def_expanded_-02x-263y000z.cif 
Images to cif 1508: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_0-0def_expanded_-245x-374y000z.cif 
Images to cif 1509: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_1-2def_expanded_209x20y000z.cif 
Images to cif 1510: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_1-2def_expanded_-195x-117y000z.cif 
Images to cif 1511: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_2-2def_expanded_-382x234y000z.cif 
Images to cif 1512: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_0-0def_expanded_-252x-113y000z.cif 
Images to cif 1513: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_1-1def_expanded_-238x172y000z.cif 
Images to cif 1514: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-2def_expanded_-01x-128y000z.cif 
Images to cif 1515: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-2def_expanded_345x-03y000z.cif 
Images to cif 1516: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_0-0def_expa

Images to cif 1588: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_1-1def_expanded_009x-355y000z.cif 
Images to cif 1589: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_2-2def_expanded_-181x-256y000z.cif 
Images to cif 1590: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_1-1def_expanded_000x000y000z.cif 
Images to cif 1591: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_1-1def_expanded_009x-105y000z.cif 
Images to cif 1592: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_step_7mu_2-2def_expanded_-382x-106y000z.cif 
Images to cif 1593: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_0-0def_expanded_000x000y000z.cif 
Images to cif 1594: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-2def_expanded_005x-128y000z.cif 
Images to cif 1595: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_7mu_1-2def_expanded_279x-158y000z.cif 
Images to cif 1596: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_1-2def_expanded_345x-128y000z.cif 
Images to cif 1597: 10	em_data/7at_2/CIF/CeO2_[110]_110sur_sawtooh_7mu_2-2def_expanded_

Images to cif 1670: 10	em_data/8at/CIF/CeO2_[110]_110sur_8mu_0-0def_expanded_282x-367y000z.cif 
Images to cif 1671: 10	em_data/8at/CIF/CeO2_[110]_110sur_step_8mu_0-0def_expanded_172x106y000z.cif 
Images to cif 1672: 10	em_data/8at/CIF/CeO2_[110]_110sur_8mu_0-0def_expanded_282x-218y000z.cif 
Images to cif 1673: 0	em_data/8at/CIF/CeO2_[110]_110sur_8mu_0-0def_expanded_282x-372y000z.cif 
Images to cif 1674: 10	em_data/8at/CIF/CeO2_[110]_110sur_step_8mu_0-0def_expanded_382x-384y000z.cif 
Images to cif 1675: 10	em_data/8at/CIF/CeO2_[110]_110sur_sawtooh_8mu_1-1def_expanded_353x013y000z.cif 
Images to cif 1676: 10	em_data/8at/CIF/CeO2_[110]_110sur_8mu_2-2def_expanded_-278x233y000z.cif 
Images to cif 1677: 10	em_data/8at/CIF/CeO2_[110]_110sur_8mu_1-2def_expanded_344x225y000z.cif 
Images to cif 1678: 10	em_data/8at/CIF/CeO2_[110]_110sur_8mu_0-0def_expanded_013x-372y000z.cif 
Images to cif 1679: 10	em_data/8at/CIF/CeO2_[110]_110sur_step_8mu_0-0def_expanded_055x-384y000z.cif 
Images to cif 1680: 1

Images to cif 1752: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-1def_expanded_-057x-329y000z.cif 
Images to cif 1753: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_1-2def_expanded_-238x30y000z.cif 
Images to cif 1754: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_2-2def_expanded_-114x21y000z.cif 
Images to cif 1755: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_1-2def_expanded_023x-047y000z.cif 
Images to cif 1756: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_8mu_1-1def_expanded_-253x-254y000z.cif 
Images to cif 1757: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-2def_expanded_174x-213y000z.cif 
Images to cif 1758: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-2def_expanded_-056x236y000z.cif 
Images to cif 1759: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-2def_expanded_174x236y000z.cif 
Images to cif 1760: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_1-2def_expanded_082x-047y000z.cif 
Images to cif 1761: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu

Images to cif 1833: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_2-2def_expanded_284x-093y000z.cif 
Images to cif 1834: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_0-0def_expanded_000x000y000z.cif 
Images to cif 1835: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-1def_expanded_-091x-329y000z.cif 
Images to cif 1836: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_0-0def_expanded_-338x-283y000z.cif 
Images to cif 1837: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_1-1def_expanded_-23x-019y000z.cif 
Images to cif 1838: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_8mu_2-2def_expanded_-114x176y000z.cif 
Images to cif 1839: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_step_8mu_1-2def_expanded_174x165y000z.cif 
Images to cif 1840: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_8mu_0-0def_expanded_013x-367y000z.cif 
Images to cif 1841: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8mu_0-0def_expanded_352x247y000z.cif 
Images to cif 1842: 10	em_data/8at_2/CIF/CeO2_[110]_110sur_sawtooh_8m

Images to cif 1915: 10	em_data/9at/CIF/CeO2_[110]_110sur_9mu_2-2def_expanded_-335x-265y000z.cif 
Images to cif 1916: 10	em_data/9at/CIF/CeO2_[110]_110sur_sawtooh_9mu_2-2def_expanded_-271x-041y000z.cif 
Images to cif 1917: 0	em_data/9at/CIF/CeO2_[110]_110sur_step_9mu_1-2def_expanded_000x000y000z.cif 
Images to cif 1918: 10	em_data/9at/CIF/CeO2_[110]_110sur_step_9mu_1-1def_expanded_-214x323y000z.cif 
Images to cif 1919: 10	em_data/9at/CIF/CeO2_[110]_110sur_9mu_2-2def_expanded_057x388y000z.cif 
Images to cif 1920: 10	em_data/9at/CIF/CeO2_[110]_110sur_sawtooh_9mu_1-2def_expanded_-298x198y000z.cif 
Images to cif 1921: 10	em_data/9at/CIF/CeO2_[110]_110sur_9mu_1-2def_expanded_000x000y000z.cif 
Images to cif 1922: 0	em_data/9at/CIF/CeO2_[110]_110sur_step_9mu_1-2def_expanded_-384x-05y000z.cif 
Images to cif 1923: 10	em_data/9at/CIF/CeO2_[110]_110sur_9mu_0-0def_expanded_197x-19y000z.cif 
Images to cif 1924: 10	em_data/9at/CIF/CeO2_[110]_110sur_sawtooh_9mu_0-0def_expanded_031x35y000z.cif 
Images 

Images to cif 1997: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_sawtooh_9mu_1-1def_expanded_064x-034y000z.cif 
Images to cif 1998: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_2-2def_expanded_-135x054y000z.cif 
Images to cif 1999: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_2-2def_expanded_-135x092y000z.cif 
Images to cif 2000: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_1-1def_expanded_000x000y000z.cif 
Images to cif 2001: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_1-1def_expanded_-081x-197y000z.cif 
Images to cif 2002: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_0-0def_expanded_094x156y000z.cif 
Images to cif 2003: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_sawtooh_9mu_1-1def_expanded_372x20y000z.cif 
Images to cif 2004: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_0-0def_expanded_316x-19y000z.cif 
Images to cif 2005: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_0-0def_expanded_197x-013y000z.cif 
Images to cif 2006: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_0-0def_expanded_316x-01

Images to cif 2078: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_1-1def_expanded_-214x-197y000z.cif 
Images to cif 2079: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_1-1def_expanded_-284x-127y000z.cif 
Images to cif 2080: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_sawtooh_9mu_1-2def_expanded_10x198y000z.cif 
Images to cif 2081: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_2-2def_expanded_-335x099y000z.cif 
Images to cif 2082: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_1-1def_expanded_062x-356y000z.cif 
Images to cif 2083: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_0-0def_expanded_316x25y000z.cif 
Images to cif 2084: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_0-0def_expanded_-363x24y000z.cif 
Images to cif 2085: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_step_9mu_2-2def_expanded_113x092y000z.cif 
Images to cif 2086: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_9mu_1-1def_expanded_354x-127y000z.cif 
Images to cif 2087: 10	em_data/9at_2/CIF/CeO2_[110]_110sur_sawtooh_9mu_1-1def_expanded_-042x-034y00

Images to cif 2159: 10	em_data/10at/CIF/CeO2_[110]_110sur_10mu_1-1def_expanded_186x-349y000z.cif 
Images to cif 2160: 10	em_data/10at/CIF/CeO2_[110]_110sur_10mu_1-1def_expanded_000x000y000z.cif 
Images to cif 2161: 10	em_data/10at/CIF/CeO2_[110]_110sur_sawtooh_10mu_2-2def_expanded_182x-133y000z.cif 
Images to cif 2162: 0	em_data/10at/CIF/CeO2_[110]_110sur_step_10mu_1-2def_expanded_054x-046y000z.cif 
Images to cif 2163: 10	em_data/10at/CIF/CeO2_[110]_110sur_sawtooh_10mu_0-0def_expanded_-365x072y000z.cif 
Images to cif 2164: 0	em_data/10at/CIF/CeO2_[110]_110sur_step_10mu_2-2def_expanded_038x279y000z.cif 
Images to cif 2165: 0	em_data/10at/CIF/CeO2_[110]_110sur_step_10mu_2-2def_expanded_000x000y000z.cif 
Images to cif 2166: 10	em_data/10at/CIF/CeO2_[110]_110sur_10mu_1-2def_expanded_197x-104y000z.cif 
Images to cif 2167: 10	em_data/10at/CIF/CeO2_[110]_110sur_10mu_0-0def_expanded_-057x-033y000z.cif 
Images to cif 2168: 10	em_data/10at/CIF/CeO2_[110]_110sur_step_10mu_1-1def_expanded_-178x-15

Images to cif 2239: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_1-2def_expanded_-352x-387y000z.cif 
Images to cif 2240: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_1-2def_expanded_000x000y000z.cif 
Images to cif 2241: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_2-2def_expanded_138x279y000z.cif 
Images to cif 2242: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_1-1def_expanded_-243x-159y000z.cif 
Images to cif 2243: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_2-2def_expanded_038x-141y000z.cif 
Images to cif 2244: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_0-0def_expanded_102x-033y000z.cif 
Images to cif 2245: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_1-2def_expanded_054x393y000z.cif 
Images to cif 2246: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_1-2def_expanded_231x-387y000z.cif 
Images to cif 2247: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_2-2def_expanded_-334x242y000z.cif 
Images to cif 2248: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_sawtooh_10mu_0-0def_e

Images to cif 2318: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_1-2def_expanded_261x-182y000z.cif 
Images to cif 2319: 0	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_0-0def_expanded_-057x-365y000z.cif 
Images to cif 2320: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_0-0def_expanded_102x-16y000z.cif 
Images to cif 2321: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_sawtooh_10mu_1-1def_expanded_-131x-033y000z.cif 
Images to cif 2322: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_1-2def_expanded_-352x-104y000z.cif 
Images to cif 2323: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_10mu_1-1def_expanded_-208x041y000z.cif 
Images to cif 2324: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_sawtooh_10mu_1-2def_expanded_-398x-232y000z.cif 
Images to cif 2325: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_1-2def_expanded_-13x-046y000z.cif 
Images to cif 2326: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_step_10mu_0-0def_expanded_-289x117y000z.cif 
Images to cif 2327: 10	em_data/10at_2/CIF/CeO2_[110]_110sur_sawtooh

In [15]:
len(defocus_list)

21117

In [16]:
import pickle

with open("X_list_full.pkl", "wb") as fp:   #Pickling
    pickle.dump(X_list, fp)

with open("y_list_full.pkl", "wb") as fp:   #Pickling
    pickle.dump(y_list, fp)

with open("defocus_list_full.pkl", "wb") as fp:   #Pickling
    pickle.dump(defocus_list, fp)

with open("img_file_list_full.pkl", "wb") as fp:   #Pickling
    pickle.dump(img_file_list, fp)

In [None]:
def last_filled_slice(grid):
    '''Function returns index of the last non-zero slice in the grid.'''
    # Sum the x-y slices
    slice_sums = grid.sum(axis=0).sum(axis=0)
    # Identify the last non-zero slice
    idx = max(index for index, item in enumerate(slice_sums) if item > 0)
    return idx

In [None]:
def count_training_data(dir_header, dir_list_cif, dir_list_img,defocus_used):
    '''
    Generates pairs of voxel grid (X) and image (y) as well as defocus parameter and filenames, for training
    3D grid -> image model.
    
    Inputs:
    dir_list_cif - list of directories containing .cif files, relative path from current directory
    dir_list_img - list of directories containing .yif files, relative path from current directory
    voxel_count - scalar int indicating desired voxels per grid dimension
    atoms_used - List of strings indicating which atom types (e.g. ['O', 'Ce']) to include. 
    defocus_used - List of ints indicating which defocus values to include. If set to 1, all values are used.
    im_size - 2-tuple of ints indicating desired pixels per image dimension (X,Y)
    
    Outputs:
    X_list - List of [grid, atom_type] lists for each .cif-.tif pair used. 
              grid is a [voxel_size^3] np.array with counts of atoms in each voxel
              atom_type is a np.array of strings for each atom's periodic symbol
                
    y_list - List of images, one for each entry in X_list. image is a [im_size[0],im_size[1]] np.array of pixel values.
              image created by cropping input to square then resizing to im_size in PIL.
    defocus_list - List of ints, defocus parameter for each sample 
    img_file_list - List of image filenames for each sample.
    
    '''
    cif_files = []
    for dirr in dir_list_cif:
        cif_files.extend(glob.glob(dir_header+dirr+'/Ce*.cif'))

    cif_count = 0
    cif_image_count = []
    image_count = 0
    for cif in cif_files:
        # count the cif files
        cif_count +=1
        # Find all associated images
        img_files = []
        [img_files.extend(glob.glob((dir_header+dirr+cif[cif.find("/",len(dir_header)+5):-4]+'*.tif').replace('[','?').replace(']','?'))) for dirr in dir_list_img]
        # find starts after the dir_header so it catches the last /
        # Filter out 'def' files
        img_files_clean = []
        for i,im_f in enumerate(img_files):
            if im_f.find("_def_") < 0:
                img_files_clean.append(im_f)
        img_files = img_files_clean

        # Filter by Defocus value
        nm_loc = [im_f.find("nmDefocus") for im_f in img_files]
        
#         defocus_cif = [abs(int(im_f[loc-2:loc])) for im_f,loc in zip(img_files,nm_loc)]
        defocus_cif = []
        for im_f,loc in zip(img_files,nm_loc):
            if im_f[loc-2:loc-1] == '_':
                defocus_cif.append(abs(int(im_f[loc-1:loc])))
            else:
                defocus_cif.append(abs(int(im_f[loc-2:loc])))
            
    
        if defocus_used == 1:
            defocus_used = set(defocus_cif)
            
        cif_image_count.append(len(defocus_used))
        image_count += len(defocus_used)
    return cif_count, cif_image_count, image_count

In [None]:
cif_count, cif_image_count, image_count = count_training_data(dir_header, dir_list_cif, dir_list_img,defocus_used)

In [None]:
print(".cif count: ",cif_count)
print("image_count: ",image_count)

In [None]:
defocus_list