# Script to separate covalent and non-covalent fragment .mol files 

In [163]:
import os
from rdkit import Chem
import re
import pandas as pd

def getMolFiles(in_directory, mpro_sum_df):
    for subdir, dirs, files in os.walk(in_directory):
        for file in files:
            filepath = subdir + os.sep + file
            if filepath.endswith(".mol"):
                frag_name = re.sub('_0.mol', '', file)
                test_df = mpro_sum_df[mpro_sum_df['Dataset'].str.contains(frag_name)]
                site_name = test_df["Site"].to_string(index=False).strip()
                if site_name == "A - active":
                    mol = Chem.MolFromMolFile(filepath, sanitize=True)
                    mol.SetProp("_Name",frag_name)
                    w = Chem.SDWriter('data/non-cov_frags/{}.mol'.format(frag_name))
                    w.write(mol)
                if site_name == "B - active - covalent":
                    mol = Chem.MolFromMolFile(filepath, sanitize=True)
                    mol.SetProp("_Name",frag_name)
                    w = Chem.SDWriter('data/cov_frags/{}.mol'.format(frag_name))
                    w.write(mol)                 

In [164]:
# Read in the Mpro screening summary
mpro_sum_df = pd.read_excel("data/Mpro full XChem screen - hits summary - ver-2020-05-16.xlsx") 

In [165]:
# Let's sort the frags into a covalent and non-covalent folders
getMolFiles("data/Mpro_frags", mpro_sum_df)