In [5]:
# Import needed packages

import glob
import numpy as np
import copy
import math

In [6]:
# Find all filepaths for snapshot .pdb files that need
# coordinates duplicated in the +/- x/y/z directions
# forming a 3x3x3 duplication box

filepaths_to_be_duplicated = []
for i in range(1, 36):
    for file_name in set(glob.glob(str(i) + 'run/*.pdb')) - set(glob.glob(str(i) + 'run/*duplicated*')) - set(glob.glob(str(i) + 'run/*sphere*')):
        filepaths_to_be_duplicated.append(file_name)

In [7]:
# This method will read the .pdb files and provide parsed information
# it requires a file path to the .pdb file
# it returns the following:
# a float indicating the bounds for a periodic cubic box 
# a list of row indexes rows where acrolein starts
# a list of row indexes where water molecules start
# a string indicating the path of the new file where duplicated molecules should be written
# a list of the original lines of the .pdb file

def parse_pdb(pdb_file_path):

    temp_newfilename = pdb_file_path.split('.')
    newfilename = temp_newfilename[0] + '_duplicated.' + temp_newfilename[1]
    sphere_file_name = temp_newfilename[0] + '_sphere.' + temp_newfilename[1]


    with open(pdb_file_path, 'r') as f:
        water_index_list, acrolein_index_list = [], []
        lines = f.readlines()

        for i in range(len(lines)):
            if 'CRYST1' in lines[i]: # Find periodic boundary conditions and record their cubic length (x-bound)
                split_line = lines[i].split()
                bound = float(split_line[1])
            if 'ATOM' in lines[i]: # Check to see if the line contains an atom
                if ' O ' in lines[i]: # Check to see if the line contains an oxygen
                    if ' H ' in lines[i+1]: # If a line contains an oxygen and the following line contains a hydrogen, a water molecule has been found
                        water_index_list.append(i)
                    if ' C ' in lines[i+1]: # If a line contains an oxygen and the following line contains a carbon, an acrolein molecule has been found
                        acrolein_index_list.append(i)
        f.close()

    return(bound, acrolein_index_list, water_index_list, newfilename, lines, sphere_file_name)

# Using the index of where waters are in the .pdb file, 
# make a list of atomic coordinates for all waters

def find_waters(water_index_list, lines):
    water_molecules = []
    for i in range(len(water_index_list)):
        molecule = []
        for x in range(3):
            atom = lines[water_index_list[i] + x].split()
            atom = [float(atom[u]) for u in range(5, 8)]
        #    water_molecules.append(atom)
            molecule.append(atom)
        water_molecules.append(molecule)
    #print(water_molecules)
    return water_molecules

In [8]:
# The following methods perform displacements in the positive or
# negative x, y, and z directions. single_change() performs one
# translation, double_change() performs two dependent translations, and
# triple_change() will perform displacements for the x, y, AND z coordinates.
# These methods require molecular coordinates, the bounds of the periodic
# cell, the desired direction of the translation (negative or positive), 
# and which dimension (x, y, or z) to translate along. It returns the 
# translated molecule.

def single_change(molecule, bounds, direction, dimension):
    for i in range(len(molecule)):
        if direction == 'plus':
            if dimension == 'x':
                molecule[i][0] = round(float(molecule[i][0]) + bounds, 3)
            if dimension == 'y':
                molecule[i][1] = round(float(molecule[i][1]) + bounds, 3)
            if dimension == 'z':
                molecule[i][2] = round(float(molecule[i][2]) + bounds, 3)
        if direction == 'minus':
            if dimension == 'x':
                molecule[i][0] = round(float(molecule[i][0]) - bounds, 3)
            if dimension == 'y':
                molecule[i][1] = round(float(molecule[i][1]) - bounds, 3)
            if dimension == 'z':
                molecule[i][2] = round(float(molecule[i][2]) - bounds, 3)
    return molecule

def double_change(molecule, bounds, direction1, direction2, dimension1, dimension2):
    for i in range(len(molecule)):
        if direction1 == 'plus' and direction2 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
        if direction1 == 'plus' and direction2 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
        if direction1 == 'minus' and direction2 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
        if direction1 == 'minus' and direction2 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
    return molecule


def triple_change(molecule, bounds, direction1, direction2, direction3, dimension1, dimension2, dimension3):
    for i in range(len(molecule)):
        if direction1 == 'plus' and direction2 == 'plus' and direction3 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) + bounds, 3)

        if direction1 == 'plus' and direction2 == 'plus' and direction3 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) - bounds, 3)

        if direction1 == 'plus' and direction2 == 'minus' and direction3 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) + bounds, 3)

        if direction1 == 'minus' and direction2 == 'plus' and direction3 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) + bounds, 3)

        if direction1 == 'plus' and direction2 == 'minus' and direction3 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) + bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) - bounds, 3)

        if direction1 == 'minus' and direction2 == 'plus' and direction3 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) + bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) - bounds, 3)

        if direction1 == 'minus' and direction2 == 'minus' and direction3 == 'plus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) + bounds, 3)

        if direction1 == 'minus' and direction2 == 'minus' and direction3 == 'minus':
            molecule[i][dimension1] = round(float(molecule[i][dimension1]) - bounds, 3)
            molecule[i][dimension2] = round(float(molecule[i][dimension2]) - bounds, 3)
            molecule[i][dimension3] = round(float(molecule[i][dimension3]) - bounds, 3)

    return molecule

In [9]:
# The following is a horrific method meant to create copies of molecules
# in three ways:
# a single coordinate shift producing 6 images in the (+/-) * (x, y, z) directions
# a double coordiante shift producing 12 images in the  (+/-) * (x, y, z) performed twice directions
# a triple coordinate shift producing 8 images in the (+/-) * (x, y, z) performed thrice directions

def make_26_copies(molecule, bounds):
    static_molecule = copy.deepcopy(molecule)
    for i in range(len(molecule)):
        # Perform single displacements
        posx = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'plus', dimension = 'x')
        negx = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'minus', dimension = 'x')
        posy = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'plus', dimension = 'y')
        negy = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'minus', dimension = 'y')
        posz = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'plus', dimension = 'z')
        negz = single_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction = 'minus', dimension = 'z')
        # Combine single displacements into a list
        single_changes = [posx, negx, posy, negy, posz, negz]
        # Perform double displacements
        posx_posy = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'plus', dimension1 = 0, dimension2 = 1)
        posx_negy = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'minus', dimension1 = 0, dimension2 = 1)
        posx_posz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'plus', dimension1 = 0, dimension2 = 2)
        posx_negz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'minus', dimension1 = 0, dimension2 = 2)
        negx_posy = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'plus', dimension1 = 0, dimension2 = 1)
        negx_negy = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'minus', dimension1 = 0, dimension2 = 1)
        negx_posz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'plus', dimension1 = 0, dimension2 = 2)
        negx_negz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'minus', dimension1 = 0, dimension2 = 2)
        posy_posz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'plus', dimension1 = 1, dimension2 = 2)
        posy_negz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'minus', dimension1 = 1, dimension2 = 2)
        negy_posz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'plus', dimension1 = 1, dimension2 = 2)
        negy_negz = double_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'minus', dimension1 = 1, dimension2 = 2)
        # Combine double displacements into a list
        double_changes = [posx_posy, posx_negy, posx_posz, posx_negz, negx_posy, negx_negy, negx_posz, negx_negz, posy_posz, posy_negz, negy_posz, negy_negz] 
        # Perform triple displacements
        posx_posy_posz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'plus', direction3 = 'plus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        posx_negy_posz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'minus', direction3 = 'plus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        posx_posy_negz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'plus', direction3 = 'minus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        posx_negy_negz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'plus', direction2 = 'minus', direction3 = 'minus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        negx_posy_posz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'plus', direction3 = 'plus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        negx_negy_posz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'minus', direction3 = 'plus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        negx_posy_negz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'plus', direction3 = 'minus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        negx_negy_negz = triple_change(molecule = copy.deepcopy(molecule), bounds = bounds, direction1 = 'minus', direction2 = 'minus', direction3 = 'minus', dimension1 = 0, dimension2 = 1, dimension3 = 2)
        # Combine triple displacements into a list
        triple_changes = [posx_posy_posz, posx_negy_posz, posx_posy_negz, posx_negy_negz, negx_posy_posz, negx_negy_posz, negx_posy_negz, negx_negy_negz]
        # Combine single, double, and triple displacement lists
        output = single_changes + double_changes + triple_changes

        return output

In [10]:
# A method that calls all the translation methods
# Requires a list of water molecules
# Returns a list of translated molecular coordinates

def translate_waters(molecules, bound):
    translated_molecules = []
    for i in range(len(molecules)):
        a = make_26_copies(molecule = molecules[i], bounds = bound)
        for y in range(len(a)):
            translated_molecules.append(a[y])
    return translated_molecules

In [11]:
# First, create a new list that is of the proper length. Since
# the original .pbd files contain acrolein molecules, headers
# and footers, some pruning is needed.
# Second, populate that new list with the 26 images of translated
# water molecules.
# This method requires a list of translated molecules, as well as the
# lines of the original .pdb file
# Returns 26 water images with indexes and coordinates

def prep_translated_atoms_for_writing(translated_molecules, lines):
    # Since our images are in molecular format, create a list of atomic format
    translated_atoms = []
    for i in range(len(translated_molecules)):
            for x in range(3):
                 translated_atoms.append(translated_molecules[i][x])


    new_lines = []
    # For each of the 26 images, duplicate the original molecules in split format
    for i in range(26):
        for x in range(10, len(lines)-1): # Skip header, acrolein, and footer
            new_lines.append(lines[x].split())
    # Replace 26*duplicated atomic coordinates with our translated coordinates
    counter = 1440 # Since we are including the original atoms, we need to start atom indexes where the original atoms left off
    for i in range(len(new_lines)):
        new_lines[i][5] = str(translated_atoms[i][0])
        new_lines[i][6] = str(translated_atoms[i][1])
        new_lines[i][7] = str(translated_atoms[i][2])
        new_lines[i][1] = str(counter)
        counter += 1
    return new_lines

def prep_in_range_atoms_for_writing(in_range_molecules, lines):
    # Since our images are in molecular format, create a list of atomic format
    in_range_atoms = []
    for i in range(len(in_range_molecules)):
        for x in range(3):
             in_range_atoms.append(in_range_molecules[i][x])



    new_lines = []
    # For each of the 26 images, duplicate the original molecules in split format
    for u in range(2, 10):
         new_lines.append(lines[u].split())
    for x in range(len(in_range_atoms)):
         new_lines.append(lines[x+ 10].split())
    
    # Replace 26*duplicated atomic coordinates with our translated coordinates
    counter = 1 # Since we are including the original atoms, we need to start atom indexes where the original atoms left off
    for i in range(8, len(new_lines)):
        new_lines[i][5] = str(in_range_atoms[i-8][0])
        new_lines[i][6] = str(in_range_atoms[i-8][1])
        new_lines[i][7] = str(in_range_atoms[i-8][2])
        new_lines[i][1] = str(counter)
        counter += 1
    return new_lines

In [12]:
# Run everything
# We are going to loop over all the wanted files in our folders

for i in range(len(filepaths_to_be_duplicated)):
    # Retreive the original files to access the information
    old_file_name = filepaths_to_be_duplicated[i]
    # Parse those files to obtain lists that we want
    bound, acrolein_index_list, water_index_list, newfilename, lines, sphere_file_name = parse_pdb(pdb_file_path = old_file_name)
    # From amongst those lists, obtain the coordinates of each water molecule
    water_molecules = find_waters(water_index_list=water_index_list, lines=lines)
    # Translate these molecules into 26 images creating a 3x3x3 slab (including originalsc)
    translated_molecules = translate_waters(molecules = water_molecules, bound=bound)
    # Rearrange the format so that the coordinates are ready to be written
    new_lines = prep_translated_atoms_for_writing(translated_molecules=translated_molecules, lines=lines)
    # Write a new file including all the original atoms along with the images
    with open(newfilename, 'w') as f:
        # We could just write the original lines, but we will split them to match the format of the imagesc
        split_lines = copy.deepcopy(lines)
        for i in range(len(lines)):
            split_lines[i] = lines[i].split()
        # Write headers of original file
        f.write(lines[0])
        f.write(lines[1])
        # Write original atoms
        for i in range(2, len(lines)-1):
            f.write('{:<6}'.format(split_lines[i][0]) + '{:>5}'.format(split_lines[i][1]) + '{:>5}'.format(split_lines[i][2]) + '{:>4}'.format(split_lines[i][3]) + '{:>6}'.format(split_lines[i][4]) + '{:>12}'.format(split_lines[i][5]) + '{:>8}'.format(split_lines[i][6]) + '{:>8}'.format(split_lines[i][7]) + '{:>6}'.format(split_lines[i][8]) + '{:>6}'.format(split_lines[i][9]) + '{:>12}'.format(split_lines[i][10]) + '\n')
        # Write image atoms
        for i in range(len(new_lines)):
            f.write('{:<6}'.format(new_lines[i][0]) + '{:>5}'.format(new_lines[i][1]) + '{:>5}'.format(new_lines[i][2]) + '{:>4}'.format(new_lines[i][3]) + '{:>6}'.format(new_lines[i][4]) + '{:>12}'.format(new_lines[i][5]) + '{:>8}'.format(new_lines[i][6]) + '{:>8}'.format(new_lines[i][7]) + '{:>6}'.format(new_lines[i][8]) + '{:>6}'.format(new_lines[i][9]) + '{:>12}'.format(new_lines[i][10]) + '\n')
        # Write footer
        f.write(lines[-1])
        f.close()

        
    



In [49]:

def get_distance(acrolein_molecule, water_molecule):
    distances = []
    for i in range(len(water_molecule)):
        atom = water_molecule[i]
        acr = acrolein_molecule
        distance = round(float(((atom[0] - acr[0]) ** 2  + (atom[1] - acr[1] ) ** 2 + (atom[2] - acr[2]) ** 2) ** 0.5), 3)
        distances.append(distance)
    return distances
for i in range(0, len(filepaths_to_be_duplicated)):
    print(filepaths_to_be_duplicated[i])
    with open(filepaths_to_be_duplicated[i], 'r') as f:

        lines = f.readlines()
        old_file_name = filepaths_to_be_duplicated[i]
        # Parse those files to obtain lists that we want
        bound, acrolein_index_list, water_index_list, newfilename, lines, sphere_file_name = parse_pdb(pdb_file_path = old_file_name)
        # From amongst those lists, obtain the coordinates of each water molecule
        water_molecules = find_waters(water_index_list=water_index_list, lines=lines)
        # Translate these molecules into 26 images creating a 3x3x3 slab (including originalsc)
        translated_molecules = translate_waters(molecules = water_molecules, bound=bound)
        # Rearrange the format so that the coordinates are ready to be written
        new_lines = prep_translated_atoms_for_writing(translated_molecules=translated_molecules, lines=lines)
    acrolein_coordinates = []
    for x in range(acrolein_index_list[0], acrolein_index_list[0] + 8):
        split_line = lines[x].split()
        acrolein_coordinates.append([float(split_line[y]) for y in range(5, 8)])
    ac = acrolein_coordinates
    mc1_xc = 12.011 * ac[1][0]
    mc1_yc = 12.011 * ac[1][1]
    mc1_zc = 12.011 * ac[1][2]
    mc2_xc = 12.011 * ac[2][0]
    mc2_yc = 12.011 * ac[2][1]
    mc2_zc = 12.011 * ac[2][2]
    mc3_xc = 12.011 * ac[3][0]
    mc3_yc = 12.011 * ac[3][1]
    mc3_zc = 12.011 * ac[3][2]
    mo1_xc = 15.999 * ac[0][0]
    mo1_yc = 15.999 * ac[0][1]
    mo1_zc = 15.999 * ac[0][2]
    mh1_xc = 1.001 * ac[4][0]
    mh1_yc = 1.001 * ac[4][1]
    mh1_zc = 1.001 * ac[5][2]
    mh2_xc = 1.001 * ac[5][0]
    mh2_yc = 1.001 * ac[5][1]
    mh2_zc = 1.001 * ac[5][2]
    mh3_xc = 1.001 * ac[6][0]
    mh3_yc = 1.001 * ac[6][1]
    mh3_zc = 1.001 * ac[6][2]
    mh4_xc = 1.001 * ac[7][0]
    mh4_yc = 1.001 * ac[7][1]
    mh4_zc = 1.001 * ac[7][2]
    x_top = mc1_xc + mc2_xc + mc3_xc + mo1_xc + mh1_xc + mh2_xc + mh3_xc + mh4_xc
    y_top = mc1_yc + mc2_yc + mc3_yc + mo1_yc + mh1_yc + mh2_yc + mh3_yc + mh4_yc
    z_top = mc1_zc + mc2_zc + mc3_zc + mo1_zc + mh1_zc + mh2_zc + mh3_zc + mh4_zc
    bottom = 1.001 * 4 + 15.999 * 1 + 12.011 * 3
    x_center = round(x_top / bottom, 3)
    y_center = round(y_top / bottom, 3)
    z_center = round(z_top / bottom, 3)
    #print(acrolein_coordinates)
    print(x_center, y_center, z_center)
    #print(water_molecules[0])
    #print(translated_molecules[0])
    
    is_in_range_molecules, not_in_range_molecules = [], [] 
    for t in range(len(water_molecules)):
        water_molecule = water_molecules[t]
        distances = get_distance(acrolein_molecule=[x_center, y_center, z_center], water_molecule = water_molecule)
        if all(x < 8 for x in distances):

            is_in_range_molecules.append(water_molecule)
        else:
            not_in_range_molecules.append(water_molecule)
    for t in range(len(translated_molecules)):
        water_molecule = translated_molecules[t]
        distances = get_distance(acrolein_molecule=[x_center, y_center, z_center], water_molecule = water_molecule)
        if all(x < 8 for x in distances):

            is_in_range_molecules.append(water_molecule)
        else:
            not_in_range_molecules.append(water_molecule)
    #print(is_in_range_molecules[0:10])
    print('in range')
    print(len(is_in_range_molecules))
    #print(not_in_range_molecules[0:10])
    print('not in range')
    print(len(not_in_range_molecules))
    #print([x_center, y_center, z_center])
    #print(acrolein_coordinates)
    #print(water_molecules[i])
    
    newfilename = sphere_file_name
    new_lines = prep_in_range_atoms_for_writing(in_range_molecules=is_in_range_molecules, lines=lines)
    # Write a new file including all the original atoms along with the images
    with open(newfilename, 'w') as f:
        # We could just write the original lines, but we will split them to match the format of the imagesc
        split_lines = copy.deepcopy(lines)
        for i in range(len(lines)):
            split_lines[i] = lines[i].split()
        # Write headers of original file
        f.write(lines[0])
        f.write(lines[1])
        # Write sphere atoms
        for i in range(len(new_lines)):
            f.write('{:<6}'.format(new_lines[i][0]) + '{:>5}'.format(new_lines[i][1]) + '{:>5}'.format(new_lines[i][2]) + '{:>4}'.format(new_lines[i][3]) + '{:>6}'.format(new_lines[i][4]) + '{:>12}'.format(new_lines[i][5]) + '{:>8}'.format(new_lines[i][6]) + '{:>8}'.format(new_lines[i][7]) + '{:>6}'.format(new_lines[i][8]) + '{:>6}'.format(new_lines[i][9]) + '{:>12}'.format(new_lines[i][10]) + '\n')
        # Write footer
        f.write('CONNECT' + '    1    2\n')
        f.write('CONNECT' + '    2    3\n')
        f.write('CONNECT' + '    3    4\n')
        f.write('CONNECT' + '    2    5\n')
        f.write('CONNECT' + '    3    6\n')
        f.write('CONNECT' + '    4    7\n')
        f.write('CONNECT' + '    4    8\n')
        for i in range(len(new_lines)):
            if i % 3 == 0:
                f.write('CONNECT' + '{:>5}'.format(str(i)) + '{:>5}'.format(str(i + 1)) + '\n')
                f.write('CONNECT' + '{:>5}'.format(str(i)) + '{:>5}'.format(str(i + 2)) + '\n')
        f.write(lines[-1])
        f.close()

# wtf

1run\snapshot_2.pdb
15.94 11.054 15.762
in range
62
not in range
12817
1run\118000_checkpoint.pdb
15.94 11.054 15.762
in range
62
not in range
12817
2run\76000_checkpoint.pdb
10.441 11.014 14.165
in range
47
not in range
12832
2run\snapshot_2.pdb
10.441 11.014 14.165
in range
47
not in range
12832
3run\282000_checkpoint.pdb
12.056 10.205 19.513
in range
38
not in range
12841
3run\snapshot_2.pdb
12.056 10.205 19.513
in range
38
not in range
12841
4run\snapshot_2.pdb
11.798 16.439 11.21
in range
50
not in range
12829
4run\82000_checkpoint.pdb
11.798 16.439 11.21
in range
50
not in range
12829
5run\snapshot_2.pdb
9.966 13.949 12.664
in range
66
not in range
12813
5run\66000_checkpoint.pdb
9.966 13.949 12.664
in range
66
not in range
12813
6run\snapshot_2.pdb
11.51 14.49 13.004
in range
54
not in range
12825
6run\122000_checkpoint.pdb
11.51 14.49 13.004
in range
54
not in range
12825
7run\84000_checkpoint.pdb
6.807 12.319 11.436
in range
58
not in range
12821
7run\snapshot_2.pdb
6.807 12.3

In [50]:
import os
try:
    os.mkdir('kevin_final')
except:
    pass
final_files = []
for i in range(1, 36):
    try:
        os.mkdir('kevin_final\\' + str(i) + 'run')
    except:
        pass
    for file_name in set(glob.glob(str(i) + 'run/*sphere*')):
        final_files.append(file_name)
#print(final_files)
#print(final_files[0])
#print(final_files[-1])
counter = 0
print(final_files)
print(len(final_files))
for i in range(0, len(final_files), 2):
    print(i)
    file_name1 = final_files[i]
    file_name2 = final_files[i+1]
    counter += 1
    
    with open(file_name1, 'r') as f:
        lines1 = f.readlines()
    intermediate_filename1 = file_name1.split('\\')

    intermediate_filename1 = intermediate_filename1[-1]
    with open(file_name2, 'r') as f:
        lines2 = f.readlines()
    intermediate_filename2 = file_name2.split('\\')

    intermediate_filename2 = intermediate_filename2[-1]
    print('done3')
    print(intermediate_filename1)
    print(intermediate_filename2)
    with open('kevin_final\\' + str(counter) + 'run\\' + str(intermediate_filename1), 'w') as f:
        for line in lines1:
            f.write(line)
    with open('kevin_final\\' + str(counter) + 'run\\' + str(intermediate_filename2), 'w') as f:
        for line in lines2:
            f.write(line)

print('done')


    
    

['1run\\snapshot_2_sphere.pdb', '1run\\118000_checkpoint_sphere.pdb', '2run\\76000_checkpoint_sphere.pdb', '2run\\snapshot_2_sphere.pdb', '3run\\282000_checkpoint_sphere.pdb', '3run\\snapshot_2_sphere.pdb', '4run\\82000_checkpoint_sphere.pdb', '4run\\snapshot_2_sphere.pdb', '5run\\snapshot_2_sphere.pdb', '5run\\66000_checkpoint_sphere.pdb', '6run\\snapshot_2_sphere.pdb', '6run\\122000_checkpoint_sphere.pdb', '7run\\snapshot_2_sphere.pdb', '7run\\84000_checkpoint_sphere.pdb', '8run\\snapshot_2_sphere.pdb', '8run\\70000_checkpoint_sphere.pdb', '9run\\76000_checkpoint_sphere.pdb', '9run\\snapshot_2_sphere.pdb', '10run\\snapshot_2_sphere.pdb', '10run\\134000_checkpoint_sphere.pdb', '11run\\snapshot_2_sphere.pdb', '11run\\86000_checkpoint_sphere.pdb', '12run\\92000_checkpoint_sphere.pdb', '12run\\snapshot_2_sphere.pdb', '13run\\snapshot_2_sphere.pdb', '13run\\94000_checkpoint_sphere.pdb', '14run\\snapshot_2_sphere.pdb', '14run\\94000_checkpoint_sphere.pdb', '15run\\62000_checkpoint_sphere.p