### This script converts the file name from Tom naming convention (filename by common structure, for example pyrd1, pyrd2...) back to Leah naming convention Het001, Het002, Het003...

In [1]:
original_folder = 'closed_shell_tom'
new_folder = 'closed_shell_leah'

In [2]:
# read in smiles_with_mapping.xlsx which contains the mapping
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

df = pd.read_excel('smiles_with_mapping.xlsx', header=0)
df

Unnamed: 0,SMILES,id,mapping,Unnamed: 3
0,CC1=NC2=C(C=CC=C2)C=C1,pyrd1,Het001,pyridine as pyrd
1,CC1=CC(C=CC=C2)=C2N=C1,pyrd2,Het002,pyrazine as pyrz
2,CC1=CC=NC2=C1C=CC=C2,pyrd3,Het003,pyrimidine as pyrmd
3,CC1=NC2=C(C=CC=C2)C=N1,pyrmd1,Het004,pyridazine as pyrdz
4,CC1=C(C=CC=C2)C2=NC=N1,pyrmd2,Het005,
5,CC1=NC2=C(C=CC=C2)N=C1,pyrz1,Het006,
6,CC1=NC=CC2=CC=CC=C21,pyrd4,Het007,
7,CC1=CC2=CC=CC=C2C=N1,pyrd5,Het008,
8,CC1=CN=CC2=CC=CC=C21,pyrd6,Het009,
9,CCC1=NC2=CC=CC=C2C=C1,pyrd7,Het010,


In [3]:
# only grep the id and mapping columns 
df = df[['id', 'mapping']]
df

Unnamed: 0,id,mapping
0,pyrd1,Het001
1,pyrd2,Het002
2,pyrd3,Het003
3,pyrmd1,Het004
4,pyrmd2,Het005
5,pyrz1,Het006
6,pyrd4,Het007
7,pyrd5,Het008
8,pyrd6,Het009
9,pyrd7,Het010


In [4]:
# convert the mapping column to a dictionary with id being key, mapping being value
mapping_dict_from_tom_to_leah = dict(zip(df['id'], df['mapping']))
mapping_dict_from_tom_to_leah

{'pyrd1': 'Het001',
 'pyrd2': 'Het002',
 'pyrd3': 'Het003',
 'pyrmd1': 'Het004',
 'pyrmd2': 'Het005',
 'pyrz1': 'Het006',
 'pyrd4': 'Het007',
 'pyrd5': 'Het008',
 'pyrd6': 'Het009',
 'pyrd7': 'Het010',
 'pyrd8': 'Het011',
 'pyrd9': 'Het012',
 'pyrmd3': 'Het013',
 'pyrmd4': 'Het014',
 'pyrz2': 'Het015',
 'pyrd10': 'Het016',
 'pyrd11': 'Het017',
 'pyrd12': 'Het018',
 'pyrd13': 'Het019',
 'pyrd14': 'Het020',
 'pyrd15': 'Het021',
 'pyrmd5': 'Het022',
 'pyrmd6': 'Het023',
 'pyrmd7': 'Het024',
 'pyrz3': 'Het025',
 'pyrdz1': 'Het026',
 'pyrdz2': 'Het027',
 'pyrd16': 'Het028',
 'pyrd17': 'Het029',
 'pyrd18': 'Het030',
 'pyrmd8': 'Het031',
 'pyrmd9': 'Het032',
 'pyrmd10': 'Het033',
 'pyrz4': 'Het034',
 'pyrdz3': 'Het035'}

In [5]:
# grep all the files in the current directory excluding jupyter notebook .ipynb files
files = [f for f in os.listdir(original_folder) if os.path.isfile(os.path.join(original_folder, f)) and not f.endswith('.ipynb')]
files

['pyrd10_conf-1.com',
 'pyrd10_conf-2.com',
 'pyrd11_conf-1.com',
 'pyrd12_conf-1.com',
 'pyrd12_conf-2.com',
 'pyrd13_conf-1.com',
 'pyrd14_conf-1.com',
 'pyrd15_conf-1.com',
 'pyrd16_conf-1.com',
 'pyrd16_conf-2.com',
 'pyrd16_conf-3.com',
 'pyrd17_conf-1.com',
 'pyrd17_conf-2.com',
 'pyrd17_conf-3.com',
 'pyrd18_conf-1.com',
 'pyrd18_conf-2.com',
 'pyrd1_conf-1.com',
 'pyrd2_conf-1.com',
 'pyrd3_conf-1.com',
 'pyrd4_conf-1.com',
 'pyrd5_conf-1.com',
 'pyrd6_conf-1.com',
 'pyrd7_conf-1.com',
 'pyrd7_conf-2.com',
 'pyrd7_conf-3.com',
 'pyrd8_conf-1.com',
 'pyrd8_conf-2.com',
 'pyrd9_conf-1.com',
 'pyrd9_conf-2.com',
 'pyrdz1_conf-1.com',
 'pyrdz2_conf-1.com',
 'pyrdz3_conf-1.com',
 'pyrdz3_conf-2.com',
 'pyrdz3_conf-3.com',
 'pyrmd10_conf-1.com',
 'pyrmd10_conf-2.com',
 'pyrmd1_conf-1.com',
 'pyrmd2_conf-1.com',
 'pyrmd3_conf-1.com',
 'pyrmd3_conf-2.com',
 'pyrmd4_conf-1.com',
 'pyrmd4_conf-2.com',
 'pyrmd5_conf-1.com',
 'pyrmd6_conf-1.com',
 'pyrmd7_conf-1.com',
 'pyrmd8_conf-1.com',

In [6]:
# let user specify the file extension to pull out, pop a widget with a dropdown list of all extensions found in the current directory
# get all extensions from the files
exts = set()
for file in files:
    exts.add(file.split('.')[-1])
exts = list(exts)
exts.sort()

# Create a dropdown for selecting the file extension
ext_widget = widgets.Dropdown(
    options=exts,
    description='File extension to convert:',
    disabled=False,
    style={'description_width': 'initial'}
)

# Create a submit button
submit_button = widgets.Button(
    description='Submit',
    disabled=False,
    button_style='', 
)

# Define a variable to check submission status
submitted = False

# Callback function for submit button
def on_submit_button_clicked(b):
    global submitted
    submitted = True
    print(f"Selected extension: {ext_widget.value}")

submit_button.on_click(on_submit_button_clicked)

# Display the dropdown and submit button
display(widgets.HBox([ext_widget, submit_button]))

HBox(children=(Dropdown(description='File extension to convert:', options=('com',), style=DescriptionStyle(des…

Selected extension: com


In [8]:
if not submitted:
    raise Exception("You haven't selected an extension yet!")
else:
    print(f"You selected the extension: {ext_widget.value}")

You selected the extension: com


In [9]:
# filter out the files with the specified extension
ext = ext_widget.value
files = [f for f in files if f.endswith('.' + ext)]
files

['pyrd10_conf-1.com',
 'pyrd10_conf-2.com',
 'pyrd11_conf-1.com',
 'pyrd12_conf-1.com',
 'pyrd12_conf-2.com',
 'pyrd13_conf-1.com',
 'pyrd14_conf-1.com',
 'pyrd15_conf-1.com',
 'pyrd16_conf-1.com',
 'pyrd16_conf-2.com',
 'pyrd16_conf-3.com',
 'pyrd17_conf-1.com',
 'pyrd17_conf-2.com',
 'pyrd17_conf-3.com',
 'pyrd18_conf-1.com',
 'pyrd18_conf-2.com',
 'pyrd1_conf-1.com',
 'pyrd2_conf-1.com',
 'pyrd3_conf-1.com',
 'pyrd4_conf-1.com',
 'pyrd5_conf-1.com',
 'pyrd6_conf-1.com',
 'pyrd7_conf-1.com',
 'pyrd7_conf-2.com',
 'pyrd7_conf-3.com',
 'pyrd8_conf-1.com',
 'pyrd8_conf-2.com',
 'pyrd9_conf-1.com',
 'pyrd9_conf-2.com',
 'pyrdz1_conf-1.com',
 'pyrdz2_conf-1.com',
 'pyrdz3_conf-1.com',
 'pyrdz3_conf-2.com',
 'pyrdz3_conf-3.com',
 'pyrmd10_conf-1.com',
 'pyrmd10_conf-2.com',
 'pyrmd1_conf-1.com',
 'pyrmd2_conf-1.com',
 'pyrmd3_conf-1.com',
 'pyrmd3_conf-2.com',
 'pyrmd4_conf-1.com',
 'pyrmd4_conf-2.com',
 'pyrmd5_conf-1.com',
 'pyrmd6_conf-1.com',
 'pyrmd7_conf-1.com',
 'pyrmd8_conf-1.com',

In [10]:
# for each file, split the filename by '_' and check if there is a match in the mapping dictionary, if there is, replace the filename with the value in the dictionary
# make a new directory to store the renamed files
# first make a new directory
os.makedirs(new_folder, exist_ok=True)

for file in files:
    if '_' in file:
        prefix = file.split('_')[0]
        # the rest stays the same
        rest = ''.join(file.split('_')[1:])
        print(f'prefix: {prefix}, rest: {rest}')
        if prefix in mapping_dict_from_tom_to_leah:
            new_filename = mapping_dict_from_tom_to_leah[prefix] + "_" + rest
            # read the original file line by line, replace the prefix with the new prefix, write to the new file
            with open(os.path.join(original_folder, file), 'r') as f:
                with open(os.path.join(new_folder, new_filename), 'w') as nf:
                    for line in f:
                        nf.write(line.replace(prefix, mapping_dict_from_tom_to_leah[prefix]))
            print(f'{file} -> {new_filename}')
        else:
            print(f'{file} not found in mapping dictionary')
    else:
        print(f'{file} not in Tom naming convention(it should have an underscore)')

prefix: pyrd10, rest: conf-1.com
pyrd10_conf-1.com -> Het016_conf-1.com
prefix: pyrd10, rest: conf-2.com
pyrd10_conf-2.com -> Het016_conf-2.com
prefix: pyrd11, rest: conf-1.com
pyrd11_conf-1.com -> Het017_conf-1.com
prefix: pyrd12, rest: conf-1.com
pyrd12_conf-1.com -> Het018_conf-1.com
prefix: pyrd12, rest: conf-2.com
pyrd12_conf-2.com -> Het018_conf-2.com
prefix: pyrd13, rest: conf-1.com
pyrd13_conf-1.com -> Het019_conf-1.com
prefix: pyrd14, rest: conf-1.com
pyrd14_conf-1.com -> Het020_conf-1.com
prefix: pyrd15, rest: conf-1.com
pyrd15_conf-1.com -> Het021_conf-1.com
prefix: pyrd16, rest: conf-1.com
pyrd16_conf-1.com -> Het028_conf-1.com
prefix: pyrd16, rest: conf-2.com
pyrd16_conf-2.com -> Het028_conf-2.com
prefix: pyrd16, rest: conf-3.com
pyrd16_conf-3.com -> Het028_conf-3.com
prefix: pyrd17, rest: conf-1.com
pyrd17_conf-1.com -> Het029_conf-1.com
prefix: pyrd17, rest: conf-2.com
pyrd17_conf-2.com -> Het029_conf-2.com
prefix: pyrd17, rest: conf-3.com
pyrd17_conf-3.com -> Het029_con