# Introduction

# Imports

In [None]:
from pathlib import Path
from harbor.plotting.ligands import plot_aligned_ligands

In order to avoiding having this repo depend directly on the asapdiscovery repo, I'm going to comment this out, but we'll use a test example from the covid moonshot molecules:
```
from asapdiscovery.data.testing.test_resources import fetch_test_file
mypath = fetch_test_file("Mpro_combined_labeled.sdf")
``` 

In [None]:
mypath = Path("../data/Mpro_combined_labeled.sdf")

I'm copying this code from the asapdiscovery repo.
Once that is conda installable, I'll make that a dep of this repo and use those tools for loading molecules

# Load Molecules

In [None]:
from rdkit import Chem
mols = Chem.SDMolSupplier(str(mypath))

In [None]:
mols = [mol for mol in mols]

In [None]:
import mols2grid

In [None]:
# define the grid to show the scaffolds
grid = mols2grid.display(mols)

In [None]:
grid

# MCSS-based Clustering

In [None]:
from harbor.clustering.hierarchical import ClusterResults, ClusterCenter, HeirarchicalClustering
from openeye import oechem

In [None]:
mol: Chem.Mol = mols[0]
mol.GetPropsAsDict()

In [None]:
oemols = []
mol_ids = []
for rdkit_mol in mols[:20]:
    smiles = Chem.MolToSmiles(rdkit_mol)
    properties = rdkit_mol.GetPropsAsDict()
    mol_ids.append(properties["Compound_ID"])
    mol = oechem.OEMol()
    oechem.OESmilesToMol(mol, smiles)
    oemols.append(mol)

In [None]:
from harbor.clustering import hierarchical as h
from importlib import reload
reload(h)

In [None]:
clusterer = h.HeirarchicalClustering(molecules=oemols, mol_ids=mol_ids)

In [None]:
clusters = clusterer.cluster(max_iterations=10)

In [None]:
len(clusters)

In [None]:
def get_descendents(cluster):
    descendents = []
    for child in cluster.children:
        if isinstance(child, str):
            descendents.append(cluster)
        else:
            descendents.extend(get_descendents(child))
    return descendents

In [None]:
from harbor.plotting import ligands as l
reload(l)

In [None]:
ids_found = []
for cluster_id, cluster in clusters.items():
    print(f"Cluster {cluster_id}")
    descendents = get_descendents(cluster)
    print(f"Children: {len(descendents)}")
    l.plot_ligands_with_mcs(filename=f"cluster_{cluster_id}.png", mols=[desc.repr for desc in descendents], mcs_mol=cluster.repr)
    ids_found.extend([desc.children[0] for desc in descendents])

In [None]:
set(ids_found)

In [None]:
set(mol_ids) - set(ids_found)

In [None]:
def get_row_col(i, max_cols, zero_indexed=True):
    row = i // max_cols + (0 if zero_indexed else 1)
    col = i % max_cols + (0 if zero_indexed else 1)
    return row, col

In [None]:
for i in range(6):
    print(get_row_col(i, 4, zero_indexed=False))