### This script converts the com file from Tom naming convention (filename by common structure, for example pyrd1, pyrd2...) <br> back to Leah naming convention Het001, Het002, Het003...
### The mapping is defined in the excel file named smiles_with_mapping.xlsx

##### for each file, split the filename by '_' and check if the prefix has a match in the mapping dictionary, if there is, replace the filename prefix with the value in the mapping dictionary <br> Also read the original file content, replace ANY old prefix with the new prefix (to change the output job filenames) <br> write to the new file

In [1]:
# define the folder names and file extension for the files to be copied
original_folder = 'open_shell_tom'
new_folder = 'open_shell_leah'
file_extension = '.com'

In [2]:
# read in smiles_with_mapping.xlsx which contains the mapping
import os, json
import pandas as pd
df = pd.read_excel('smiles_with_mapping.xlsx', header=0)
df = df[['id', 'mapping']]
df

Unnamed: 0,id,mapping
0,pyrd1,Het001
1,pyrd2,Het002
2,pyrd3,Het003
3,pyrmd1,Het004
4,pyrmd2,Het005
5,pyrz1,Het006
6,pyrd4,Het007
7,pyrd5,Het008
8,pyrd6,Het009
9,pyrd7,Het010


In [3]:
# convert the mapping column to a dictionary with id being key, mapping being value
mapping_dict_from_tom_to_leah = dict(zip(df['id'], df['mapping']))
mapping_dict_from_tom_to_leah
# dump the dictionary to a json file
with open('tom_to_leah_mapping.json', 'w') as f:
    json.dump(mapping_dict_from_tom_to_leah, f)

In [4]:
# grep all the files in the current directory, limit to the desired file extension
files = [f for f in os.listdir(original_folder) if os.path.isfile(os.path.join(original_folder, f)) and f.endswith(file_extension)]
files

['pyrd10_conf-1_openshell.com',
 'pyrd10_conf-2_openshell.com',
 'pyrd11_conf-1_openshell.com',
 'pyrd12_conf-1_openshell.com',
 'pyrd12_conf-2_openshell.com',
 'pyrd13_conf-1_openshell.com',
 'pyrd14_conf-1_openshell.com',
 'pyrd15_conf-1_openshell.com',
 'pyrd16_conf-1_openshell.com',
 'pyrd16_conf-2_openshell.com',
 'pyrd16_conf-3_openshell.com',
 'pyrd17_conf-1_openshell.com',
 'pyrd17_conf-2_openshell.com',
 'pyrd17_conf-3_openshell.com',
 'pyrd18_conf-1_openshell.com',
 'pyrd18_conf-2_openshell.com',
 'pyrd1_conf-1_openshell.com',
 'pyrd2_conf-1_openshell.com',
 'pyrd3_conf-1_openshell.com',
 'pyrd4_conf-1_openshell.com',
 'pyrd5_conf-1_openshell.com',
 'pyrd6_conf-1_openshell.com',
 'pyrd7_conf-1_openshell.com',
 'pyrd7_conf-2_openshell.com',
 'pyrd7_conf-3_openshell.com',
 'pyrd8_conf-1_openshell.com',
 'pyrd8_conf-2_openshell.com',
 'pyrd9_conf-1_openshell.com',
 'pyrd9_conf-2_openshell.com',
 'pyrdz1_conf-1_openshell.com',
 'pyrdz2_conf-1_openshell.com',
 'pyrdz3_conf-1_opens

In [5]:
# for each file, split the filename by '_' and check if there is a match in the mapping dictionary, if there is, replace the filename with the value in the dictionary
# make a new directory to store the renamed files
# first make a new directory
os.makedirs(new_folder, exist_ok=True)

for file in files:
    if '_' in file:
        prefix = file.split('_')[0]
        # the rest stays the same
        rest = '_'.join(file.split('_')[1:])
        print(f'prefix: {prefix}, rest: {rest}')
        if prefix in mapping_dict_from_tom_to_leah:
            new_filename = mapping_dict_from_tom_to_leah[prefix] + "_" + rest
            # read the original file line by line, replace the prefix with the new prefix, write to the new file
            with open(os.path.join(original_folder, file), 'r') as f:
                with open(os.path.join(new_folder, new_filename), 'w') as nf:
                    for line in f:
                        nf.write(line.replace(prefix, mapping_dict_from_tom_to_leah[prefix]))
            print(f'{file} -> {new_filename}')
        else:
            print(f'{file} not found in mapping dictionary')
    else:
        print(f'{file} not in Tom naming convention(it should have an underscore)')

prefix: pyrd10, rest: conf-1_openshell.com
pyrd10_conf-1_openshell.com -> Het016_conf-1_openshell.com
prefix: pyrd10, rest: conf-2_openshell.com
pyrd10_conf-2_openshell.com -> Het016_conf-2_openshell.com
prefix: pyrd11, rest: conf-1_openshell.com
pyrd11_conf-1_openshell.com -> Het017_conf-1_openshell.com
prefix: pyrd12, rest: conf-1_openshell.com
pyrd12_conf-1_openshell.com -> Het018_conf-1_openshell.com
prefix: pyrd12, rest: conf-2_openshell.com
pyrd12_conf-2_openshell.com -> Het018_conf-2_openshell.com
prefix: pyrd13, rest: conf-1_openshell.com
pyrd13_conf-1_openshell.com -> Het019_conf-1_openshell.com
prefix: pyrd14, rest: conf-1_openshell.com
pyrd14_conf-1_openshell.com -> Het020_conf-1_openshell.com
prefix: pyrd15, rest: conf-1_openshell.com
pyrd15_conf-1_openshell.com -> Het021_conf-1_openshell.com
prefix: pyrd16, rest: conf-1_openshell.com
pyrd16_conf-1_openshell.com -> Het028_conf-1_openshell.com
prefix: pyrd16, rest: conf-2_openshell.com
pyrd16_conf-2_openshell.com -> Het028_c