In [None]:
import os
import sys
import subprocess
import pickle
import numpy as np
import ase.io as aseIO
from sklearn.preprocessing import MinMaxScaler

def getMinIdx(filename):
    x = np.load(filename)
    xErr = x[0, :, :, 0, 0, 5]
    xMin = np.argmin(xErr)
    nCol = x.shape[2]
    pos = (xMin/nCol, xMin%nCol)
    
    # Output width, sigma
    return (x[0, pos[0], pos[1], 0, 0, 1],
            x[0, pos[0], pos[1], 0, 0, 2])

sampleSizes = ['10000']
sampleNames = ['10k']
properties = ['Energy_per_Si_Opt', 'volume']
propertyNames = ['Energy', 'Volume']
cutoffs = ['3.5', '6.0']
kernels = ['gaussian']
kernelNames = ['Gaussian']
maxTrain = ['8000']
trainPts = {'10k': ['10', '30', '50', '100', '300', '500', '1000', '3000', '5000', '8000']}
pcaPts = {'Energy': ['1', '2', '4', '10', '20', '100', '300'],
         'Volume': ['1', '2', '4', '10', '20', '50', '100']}

try:
    parameterFile = open('hyperparameters.pkl', 'rb')
    print 'Found hyperparameter file'
    hypers = pickle.load(parameterFile)
    parameterFile.close()
except IOError:
    hypers = {}
    print 'Creating new hyperparameters'

In [None]:
print hypers

# TODO: figure out how to split the test, training, and validation sets

# Make k-Folds

In [None]:
# DEEM
for ss, sn in zip(sampleSizes, sampleNames):
    print 'Current construction: %s' % sn
    workDir = '../Processed_Data/DEEM_%s' % sn
    
    # Create directories
    subprocess.call(['mkdir', '-p', workDir])
    
    # Make k-folds
    subprocess.call(['python', 'kFolds.py',
                    '-nt', ss,
                    '-f', '0.9965', # 35 test structures (same as IZA)
                    '-k', '5',
                    '-output', workDir])

In [None]:
# Optimized IZA

print 'Current construction: IZA'
workDir = '../Processed_Data/IZA'

# Create directories
subprocess.call(['mkdir', '-p', workDir])

subprocess.call(['python', 'kFolds.py',
                '-nt', '35',
                '-f', '1.0',
                '-k', '5',
                '-output', workDir])

# Extract atom data and properties

## DEEM 10k

In [None]:
# Append optimized energies to xyz file
subprocess.call(['python', 'appendStructureProperty.py',
                '-f', '../Raw_Data/GULP/DEEM_10000_EnergyComponents.dat',
                '-xyz', '../Raw_Data/DEEM_10000.xyz',
                '-c', '8',
                '-n', 'Energy_per_Si_Opt'])

subprocess.call(['mv', '../Raw_Data/DEEM_10000_SP.xyz', '../Raw_Data/GULP/DEEM_10000_OPT.xyz'])

In [None]:
# Extract SOAP environment atoms
subprocess.call(['python', 'atomLabels.py',
                '-input', '../Raw_Data/GULP/DEEM_10000_OPT.xyz',
                '-output', '../Processed_Data/DEEM_10k/atoms.dat',
                '-Z', '14',
                '-sp', 'Energy_per_Si_Opt', 'Filename'])

## IZA

In [None]:
# Append optimized energies to xyz file
subprocess.call(['python', 'appendStructureProperty.py',
                '-f', '../Raw_Data/GULP/Energies_IZA_ConPAfterConV.dat',
                '-xyz', '../Raw_Data/GULP/Final_Confs_IZA_ConPAfterConV.xyz',
                '-c', '8',
                '-n', 'Energy_per_Si_Opt'])

subprocess.call(['mv', '../Raw_Data/GULP/Final_Confs_IZA_ConPAfterConV_SP.xyz', '../Raw_Data/GULP/IZA_OPT.xyz'])
subprocess.call(['sed', '-i', "s/Filename= /Filename=/g", '../Raw_Data/GULP/IZA_OPT.xyz'])

In [None]:
# Extract SOAP environment atoms
subprocess.call(['python', 'atomLabels.py',
                '-input', '../Raw_Data/GULP/IZA_OPT.xyz',
                '-output', '../Processed_Data/IZA_OPT/atoms.dat',
                '-Z', '14',
                '-sp', 'Energy_per_Si_Opt', 'Filename'])

# Compute SOAP

## DEEM 10k

In [None]:
for c in cutoffs:
    print 'Current construction: %s' % c
    structureFile = '../Raw_Data/GULP/DEEM_10000_OPT.xyz'
    workDir = '../Processed_Data/DEEM_10k/PCA/%s' % c
    
    # Create directories
    subprocess.call(['mkdir', '-p', workDir])
    
    # Select random structures
    subprocess.call(['python', 'randomStructureSelect.py',
                    '-structure', structureFile,
                    '-nt', '10000',
                    '-nr', '2000',
                    '-output', workDir])

    # Compute SOAP vectors for random structures
    subprocess.call(['python', 'SOAP.py',
                    '-structure', '%s/randomSelection.xyz' % workDir,
                    '-n', '12',
                    '-l', '9',
                    '-c', c,
                    '-cw', '0.3',
                    '-g', '0.3',
                    '-Z', '14',
                    '-z', '14', '8',
                    '-output', workDir])

    # Select FPS components
    subprocess.call(['python', 'FPS.py',
                    '-soap', '%s/SOAPFiles.dat' % workDir,
                    '-fps', '500',
                    '-c',
                    '-output', workDir])
    
    os.rename('%s/FPS.idxs' % workDir,
              '%s/FPS-c.idxs' % workDir)

    # (Re-)compute all SOAPs, retain only FPS components
    subprocess.call(['python', 'SOAP.py',
                    '-structure', structureFile,
                    '-n', '12',
                    '-l', '9',
                    '-c', c,
                    '-cw', '0.3',
                     '-g', '0.3',
                    '-Z', '14',
                    '-z', '14', '8',
                    '-idxs', '%s/FPS-c.idxs' % workDir,
                    '-batchsize', '500',
                    '-output', workDir])

    # Select representative environments
    subprocess.call(['python', 'FPS.py',
                    '-soap', '%s/SOAPFiles.dat' % workDir,
                    '-fps', '2000',
                    '-output', workDir])
    
    os.rename('%s/FPS.idxs' % workDir,
              '%s/FPS-rSOAP.idxs' % workDir)

## IZA

In [None]:
for c in cutoffs:
    print 'Current construction: %s' % c
    structureFile = '../Raw_Data/GULP/IZA_OPT.xyz'
    workDir = '../Processed_Data/IZA/PCA/%s' % c
    
    # Create directories
    subprocess.call(['mkdir', '-p', workDir])
        
    # Compute full SOAP vectors
    subprocess.call(['python', 'SOAP.py',
                    '-structure', structureFile,
                    '-n', '12',
                    '-l', '9',
                    '-c', c,
                    '-cw', '0.3',
                    '-g', '0.3',
                    '-Z', '14',
                    '-z', '14', '8',
                    '-output', workDir])

    # Select FPS components
    subprocess.call(['python', 'FPS.py',
                    '-soap', '%s/SOAPFiles.dat' % workDir,
                    '-fps', '500',
                    '-c',
                    '-output', workDir])
    
    os.rename('%s/FPS.idxs' % workDir, 
              '%s/FPS-c.idxs' % workDir)
    
    # Recompute SOAPs, retain only the FPS components
    subprocess.call(['python', 'SOAP.py',
                    '-structure', structureFile,
                    '-n', '12',
                    '-l', '9',
                    '-c', c,
                    '-cw', '0.3',
                    '-g', '0.3',
                    '-Z', '14',
                    '-z', '14', '8',
                    '-idxs', '%s/FPS-c.idxs' % workDir,
                    '-output', workDir])
    
    # Create dummy FPS file that includes all environments (for building the KPCA)
    s = subprocess.Popen(['wc', '-l', '../Processed_Data/IZA_OPT/atoms.dat'],
                         stdout=subprocess.PIPE)
    nEnv = int(s.communicate()[0].strip().split()[0])
    
    np.savetxt('%s/FPS-rSOAP.idxs' % workDir, np.arange(0, nEnv), fmt='%d')

# Hyperparameter optimization

## DEEM 10k

In [None]:
for ss, sn, mt in zip(sampleSizes, sampleNames, maxTrain):
    for c in cutoffs:
        for p, pn in zip(properties, propertyNames):
            for k, kn in zip(kernels, kernelNames):
                print 'Current optimization: %s %s %s %s' % (sn, c, pn, kn)
                workDir = '../Processed_Data/DEEM_%s/%s/%s/ParameterSearch/%s' % (sn, pn, c, kn)
                dataDir = '../Processed_Data/DEEM_%s/PCA/%s' % (sn, c)
                foldDir = '../Processed_Data/DEEM_%s' % sn
                structureFile = '../Raw_Data/DEEM_%s.xyz' % ss
                dictKey = '%s-SOAP%s-%s-%s' % (sn, c, pn[0], kn[0])
                
                # Create directories
                subprocess.call(['mkdir', '-p', workDir])

                subprocess.call(['python', 'learningCurves.py',
                                '-structure', structureFile,
                                '-soap', '%s/SOAPFiles.dat' % dataDir,
                                '-idxs', '%s/FPS-rSOAP.idxs' % dataDir,
                                '-p', p,
                                '-Z', '14',
                                '-k', '5',
                                '-kernel', k,
                                '-width', '0.001', '0.003', '0.01', '0.03', 
                                          '0.1', '0.3', '1.0', '3.0', '10.0',
                                '-sigma', '0.001', '0.003', '0.01', '0.03', 
                                          '0.1', '0.3', '1.0', '3.0', '10.0',
                                '-zeta', '1',
                                '-ntrain', mt,
                                '-train', '%s/kTrain.idxs' % foldDir,
                                '-validate', '%s/kValidate.idxs' % foldDir,
                                '-shuffle',
                                '-output', workDir])
                optParams = getMinIdx('%s/maeAvgTest.npy' % workDir)
                hypers[dictKey] = optParams

parameterFile = open('hyperparameters.pkl', 'wb')
pickle.dump(hypers, parameterFile)
parameterFile.close()

## IZA

In [None]:
for c in cutoffs:
    for p, pn in zip(properties, propertyNames):
        for k, kn in zip(kernels, kernelNames):
            print 'Current optimization: %s %s %s' % (c, pn, kn)
            workDir = '../Processed_Data/IZA/%s/%s/ParameterSearch/%s' % (pn, c, kn)
            dataDir = '../Processed_Data/IZA/PCA/%s' % c
            foldDir = '../Processed_Data/IZA'
            structureFile = '../Raw_Data/GULP/IZA_OPT.xyz'
            dictKey = 'IZA-SOAP%s-%s-%s' % (c, pn[0], kn[0])
                
            # Create directories
            subprocess.call(['mkdir', '-p', workDir])

            subprocess.call(['python', 'learningCurves.py',
                            '-structure', structureFile,
                            '-soap', '%s/SOAPFiles.dat' % dataDir,
                            '-idxs', '%s/FPS-rSOAP.idxs' % dataDir,
                            '-p', p,
                            '-Z', '14',
                            '-k', '5',
                            '-kernel', k,
                            '-width', '0.001', '0.003', '0.01', '0.03', 
                                      '0.1', '0.3', '1.0', '3.0', '10.0',
                            '-sigma', '0.001', '0.003', '0.01', '0.03', 
                                      '0.1', '0.3', '1.0', '3.0', '10.0',
                            '-zeta', '1',
                            '-ntrain', '28', # TODO: change this depending on how the train and test is split
                            '-train', '%s/kTrain.idxs' % foldDir,
                            '-validate', '%s/kValidate.idxs' % foldDir,
                            '-shuffle',
                            '-output', workDir])
            optParams = getMinIdx('%s/maeAvgTest.npy' % workDir)
            hypers[dictKey] = optParams

parameterFile = open('hyperparameters.pkl', 'wb')
pickle.dump(hypers, parameterFile)
parameterFile.close()

# Build KPCA

## DEEM 10k

In [None]:
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        print 'Current construction: %s %s' % (sn, c)
        dictKey = '%s-SOAP%s-%s-%s' % (sn, c, 'V', 'G') # Use volume as default choice
        workDir = '../Processed_Data/DEEM_%s/PCA/%s' % (sn, c)
        
        # Create directories
        subprocess.call(['mkdir', '-p', workDir])

        subprocess.call(['python', 'SOAP-KPCA.py',
                        '-soap', '%s/SOAPFiles.dat' % workDir,
                        '-pca', '500',
                        '-kernel', 'gaussian',
                        '-width', str(hypers[dictKey][0]),
                        '-lowmem',
                        '-idxs', '%s/FPS-rSOAP.idxs' % workDir,
                        '-output', workDir])

        subprocess.call(['python', 'FPS.py',
                        '-soap', '%s/KPCAFiles.dat' % workDir,
                        '-fps', '2000',
                        '-output', workDir])
        
        os.rename('%s/FPS.idxs' % workDir,
                  '%s/FPS-rKPCA.idxs' % workDir)

## IZA

In [None]:
for c in cutoffs:
    print 'Current construction: IZA %s' % c
    dictKey = 'IZA-SOAP%s-%s-%s' % (sn, c, 'V', 'G') # Use volume as default choice
    workDir = '../Processed_Data/IZA/PCA/%s' % c
        
    # Create directories
    subprocess.call(['mkdir', '-p', workDir])

    subprocess.call(['python', 'SOAP-KPCA.py',
                    '-soap', '%s/SOAPFiles.dat' % workDir,
                    '-pca', '500',
                    '-kernel', 'gaussian', # Don't need linear b/c we have the PCA, which is faster
                    '-width', str(hypers[dictKey][0]),
                    '-lowmem',
                    '-idxs', '%s/FPS-rSOAP.idxs' % workDir,
                    '-output', workDir])
    
    # Create dummy FPS file that includes all environments
    s = subprocess.Popen(['wc', '-l', '../Processed_Data/IZA_OPT/atoms.dat'],
                         stdout=subprocess.PIPE)
    nEnv = int(s.communicate()[0].strip().split()[0])
    
    np.savetxt('%s/FPS-rKPCA.idxs' % workDir, np.arange(0, nEnv), fmt='%d')

# Projection of IZA onto DEEM

In [None]:
# Create SOAPs for projection
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        print 'Current construction: %s %s' % (sn, c)
        structureFile = '../Raw_Data/GULP/IZA_OPT.xyz'
        workDir = '../Processed_Data/IZAonDEEM_%s/PCA/%s' % (sn, c)
        dataDir = '../Processed_Data/DEEM_%s/PCA/%s' % (sn, c)

        # Create directories
        subprocess.call(['mkdir', '-p', workDir])

        # Compute SOAPs, retain only the FPS components that are consistent with DEEM
        subprocess.call(['python', 'SOAP.py',
                        '-structure', structureFile,
                        '-n', '12',
                        '-l', '9',
                        '-c', c,
                        '-cw', '0.3',
                        '-g', '0.3',
                        '-Z', '14',
                        '-z', '14', '8',
                        '-idxs', '%s/FPS-c.idxs' % dataDir,
                        '-output', workDir])
        
        # Create dummy FPS file that includes all environments (for building the KPCA)
        s = subprocess.Popen(['wc', '-l', '../Processed_Data/IZA_OPT/atoms.dat'],
                         stdout=subprocess.PIPE)
        nEnv = int(s.communicate()[0].strip().split()[0])
    
        np.savetxt('%s/FPS-rSOAP.idxs' % workDir, np.arange(0, nEnv), fmt='%d')

In [None]:
# Kernel PCA
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        print 'Current construction: KPCA %s %s' % (sn, c)
        dictKey = '%s-SOAP%s-%s-%s' % (sn, c, 'V', 'G') # Use volume as default choice
        workDir = '../Processed_Data/IZAonDEEM_%s/PCA/%s' % (sn, c)
        dataDir = '../Processed_Data/DEEM_%s/PCA/%s' % (sn, c)
        
        # Create directories
        subprocess.call(['mkdir', '-p', workDir])

        subprocess.call(['python', 'SOAP-KPCA.py',
                        '-soap', '%s/SOAPFiles.dat' % dataDir,
                        '-idxs', '%s/FPS-rSOAP.idxs' % dataDir,
                        '-pca', '500',
                        '-kernel', 'gaussian',
                        '-width', str(hypers[dictKey][0]),
                        '-lowmem',
                        '-dotransform', '%s/SOAPFiles.dat' % workDir,
                        '-w', '%s/UFiles.dat' % dataDir,
                        '-mean', '%s/G-mean.npy' % dataDir,
                        '-g', '%s/GFiles.dat' % dataDir, 
                        '-output', workDir])
        
        # Create dummy FPS file that includes all environments (for building the KPCA)
        s = subprocess.Popen(['wc', '-l', '../Processed_Data/IZA_OPT/atoms.dat'],
                         stdout=subprocess.PIPE)
        nEnv = int(s.communicate()[0].strip().split()[0])
    
        np.savetxt('%s/FPS-rKPCA.idxs' % workDir, np.arange(0, nEnv), fmt='%d')

# Projection of DEEM onto IZA

In [None]:
# Create SOAPs for projection
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        print 'Current construction: %s %s' % (sn, c)
        structureFile = '../Raw_Data/GULP/DEEM_%s_OPT.xyz'
        workDir = '../Processed_Data/DEEM_%sonIZA/PCA/%s' % (sn, c)
        dataDir = '../Processed_Data/IZA/PCA/%s' % c

        # Create directories
        subprocess.call(['mkdir', '-p', workDir])

        # Compute SOAPs, retain only the FPS components that are consistent with IZA
        subprocess.call(['python', 'SOAP.py',
                        '-structure', structureFile,
                        '-n', '12',
                        '-l', '9',
                        '-c', c,
                        '-cw', '0.3',
                        '-g', '0.3',
                        '-Z', '14',
                        '-z', '14', '8',
                        '-idxs', '%s/FPS-c.idxs' % dataDir,
                        '-output', workDir])

        subprocess.call(['python', 'FPS.py',
                        '-soap', '%s/SOAPFiles.dat' % workDir,
                        '-fps', '2000',
                        '-output', workDir])
        
        os.rename('%s/FPS.idxs' % workDir,
                  '%s/FPS-rSOAP.idxs' % workDir)

In [None]:
# Kernel PCA
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        print 'Current construction: KPCA %s %s' % (sn, c)
        dictKey = 'IZA-SOAP%s-%s-%s' % (c, 'V', 'G') # Use volume as default choice
        workDir = '../Processed_Data/DEEM_%sonIZA/PCA/%s' % (sn, c)
        dataDir = '../Processed_Data/IZA/PCA/%s' % c
        
        # Create directories
        subprocess.call(['mkdir', '-p', workDir])

        subprocess.call(['python', 'SOAP-KPCA.py',
                        '-soap', '%s/SOAPFiles.dat' % dataDir,
                        '-idxs', '%s/FPS-rSOAP.idxs' % dataDir,
                        '-pca', '500',
                        '-kernel', 'gaussian',
                        '-width', str(hypers[dictKey][0]),
                        '-lowmem',
                        '-dotransform', '%s/SOAPFiles.dat' % workDir,
                        '-w', '%s/UFiles.dat' % dataDir,
                        '-mean', '%s/G-mean.npy' % dataDir,
                        '-g', '%s/GFiles.dat' % dataDir, 
                        '-output', workDir])
        
        subprocess.call(['python', 'FPS.py',
                        '-soap', '%s/KPCAFiles.dat' % workDir,
                        '-fps', '2000',
                        '-output', workDir])
        
        os.rename('%s/FPS.idxs' % workDir,
                  '%s/FPS-rKPCA.idxs' % workDir)

# Property Decomposition

## DEEM 10k

In [None]:
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        for p, pn in zip(properties, propertyNames):
            for k, kn in zip(kernels, kernelNames):
                print 'Current model: %s %s %s %s' % (sn, c, pn, kn)
                workDir = '../Processed_Data/DEEM_%s/%s/%s/%s' % (sn, pn, c, kn)
                structureFile = '../Raw_Data/GULP/DEEM_%s_OPT.xyz' % ss
                dataFile = '../Processed_Data/DEEM_%s/PCA/%s/SOAPFiles.dat' % (sn, c)
                idxsFile = '../Processed_Data/DEEM_%s/PCA/%s/FPS-rSOAP.idxs' % (sn, c)
                dictKey = '%s-SOAP%s-%s-%s' % (sn, c, pn[0], kn[0])

                # Create directories
                subprocess.call(['mkdir', '-p', workDir])

                subprocess.call(['python', 'propertyRegression.py',
                                '-structure', structureFile,
                                '-soap', dataFile,
                                '-idxs', idxsFile,
                                '-p', p, 
                                '-Z', '14',
                                '-kernel', k,
                                '-width', str(hypers[dictKey][0]),
                                '-sigma', str(hypers[dictKey][1]),
                                '-zeta', '1',
                                '-ntrain', ss,
                                '-env',
                                '-lowmem',
                                '-output', workDir])

## IZA

In [None]:
for c in cutoffs:
    for p, pn in zip(properties, propertyNames):
        for k, kn in zip(kernels, kernelNames):
            print 'Current model: IZA %s %s %s' % (c, pn, kn)
            workDir = '../Processed_Data/IZA/%s/%s/%s' % (pn, c, kn)
            structureFile = '../Raw_Data/GULP/IZA_OPT.xyz' % ss
            dataFile = '../Processed_Data/IZA/PCA/%s/SOAPFiles.dat' % (sn, c)
            idxsFile = '../Processed_Data/IZA/PCA/%s/FPS-rSOAP.idxs' % (sn, c)
            dictKey = 'IZA-SOAP%s-%s-%s' % (c, pn[0], kn[0])

            # Create directories
            subprocess.call(['mkdir', '-p', workDir])

            subprocess.call(['python', 'propertyRegression.py',
                            '-structure', structureFile,
                            '-soap', dataFile,
                            '-idxs', idxsFile,
                            '-p', p, 
                            '-Z', '14',
                            '-kernel', k,
                            '-width', str(hypers[dictKey][0]),
                            '-sigma', str(hypers[dictKey][1]),
                            '-zeta', '1',
                            '-ntrain', '35',
                            '-env',
                            '-lowmem',
                            '-output', workDir])

# Property Prediction

## Predict IZA using DEEM 10k

## Predict DEEM 10k using IZA

# Find unique environments with FPS

## DEEM 10k

In [None]:
# Find unique environments with FPS
for c in cutoffs:
    print 'Current construction: %s' % c
    workDir = '../Processed_Data/DEEM_10k/PCA/%s' % c
    
    # SOAP
    subprocess.call(['python', 'FPS.py',
                    '-soap', '%s/SOAPFiles.dat' % workDir,
                    '-fps', '75000',
                    '-nobatch',
                    '-d',
                    '-output', workDir])
    
    os.rename('%s/FPS.idxs' % workDir,
              '%s/FPS-uSOAP.idxs' % workDir)
    
    # PCA
    #subprocess.call(['python', 'FPS.py',
    #                '-soap', '%s/PCAFiles.dat' % workDir,
    #                '-fps', '75000',
    #                '-nobatch',
    #                '-d',
    #                '-output', workDir])
    
    #os.rename('%s/FPS.idxs' % workDir,
    #          '%s/FPS-uPCA.idxs' % workDir)
    
    # KPCA
    #subprocess.call(['python', 'FPS.py',
    #                '-soap', '%s/KPCAFiles.dat' % workDir,
    #                '-fps', '75000',
    #                '-nobatch',
    #                '-d',
    #                '-output', workDir])
 
    #os.rename('%s/FPS.idxs' % workDir,
    #          '%s/FPS-uKPCA.idxs' % workDir)

## IZA

In [None]:
# Find unique environments with FPS
for iza in ['IZA_OPT', 'IZA_OPTonDEEM_1k', 'IZA_OPTonDEEM_10k']:
    for c in cutoffs:
        print 'Current construction: %s %s' % (iza, c)
        workDir = '../Processed_Data/%s/PCA/%s' % (iza, c)

        # SOAP
        subprocess.call(['python', 'FPS.py',
                        '-soap', '%s/SOAPFiles.dat' % workDir,
                        '-fps', '75000',
                        '-nobatch',
                        '-d',
                        '-output', workDir])

        os.rename('%s/FPS.idxs' % workDir,
                  '%s/FPS-uSOAP.idxs' % workDir)

        # PCA
        #subprocess.call(['python', 'FPS.py',
        #                '-soap', '%s/PCAFiles.dat' % workDir,
        #                '-fps', '75000',
        #                '-nobatch',
        #                '-d',
        #                '-output', workDir])

        #os.rename('%s/FPS.idxs' % workDir,
        #          '%s/FPS-uPCA.idxs' % workDir)

        # KPCA
        #if iza == 'IZA_OPT':
        #    subprocess.call(['python', 'FPS.py',
        #                   '-soap', '%s/KPCAFiles.dat' % workDir,
        #                    '-fps', '75000',
        #                    '-nobatch',
        #                    '-d',
        #                    '-output', workDir])

        #    os.rename('%s/FPS.idxs' % workDir,
        #              '%s/FPS-uKPCA.idxs' % workDir) 
        
        #else:
        #    subprocess.call(['python', 'FPS.py',
        #                    '-soap', '%s/projFiles.dat' % workDir,
        #                    '-fps', '75000',
        #                    '-nobatch',
        #                    '-d',
        #                    '-output', workDir])

        #   os.rename('%s/FPS.idxs' % workDir,
        #             '%s/FPS-uKPCA.idxs' % workDir)

# Distance Histograms

In [None]:
for ss, sn in zip(sampleSizes, sampleNames):
    for c in cutoffs:
        for k, kn in zip(kernels, kernelNames):
            print 'Current construction: %s %s %s %s' % (sn, c, kn, rn)
            dictKey = '%s-SOAP%s-%s-%s' % (sn, c, 'V', 'G') # Use DEEM volume as default choice
            idxsFileI = '../Processed_Data/IZAonDEEM_%s/PCA/%s/FPS-uSOAP.idxs' % (sn, c)
            idxsFileD = '../Processed_Data/DEEM_%s/PCA/%s/FPS-uSOAP.idxs' % (sn, c)
            deemDir = '../Processed_Data/DEEM_%s/PCA/%s' % (sn, c)
            izaDir = '../Processed_Data/IZA_OPTonDEEM_%s/PCA/%s' % (sn, c)
            workDir = '../Processed_Data/IZA_OPTonDEEM_%s/Histograms/SOAP/%s/%s/%s' % (sn, c, kn, rn)

            # Create directories
            subprocess.call(['mkdir', '-p', workDir])

            subprocess.call(['python', 'histograms.py',
                            '-deem', '%s/SOAPFiles.dat' % deemDir,
                            '-idxsd', idxsFileD,
                            '-idxsi', idxsFileI,
                            '-iza', '%s/SOAPFiles.dat' % izaDir,
                            '-kernel', k,
                            '-width', str(hypers[dictKey][0]),
                            '-zeta', '1',
                            '-nbins', '200',
                            '-batchsize', '2000',
                            '-range', '-4', '10',
                            '-log',
                            '-output', workDir])