In [None]:
%cd ..

import networkx as nx
from rdkit import Chem

import pandas as pd
import numpy as np

from chemicalgof import (
    Reduce2GoF,
    fragSMILES2GoF,
    Sequence2GoF,
    GoF2Mol,
    GoF2fragSMILES,
    encode,
    decode,
    split
)

from chemicalgof.write import CanonicalGoF2fragSMILES

# How can you reduce atom-based molecular graph into fragment-based molecular graph (DiG)

In [None]:
smiles = 'Cc1cc2c(cc1Cc1ccc(C(=O)NC[C@H]3CC[C@H](C(N)=O)CC3)o1)C(C)(C)CCC2(C)C'
mol = Chem.MolFromSmiles(smiles)

In [None]:
mol

In [None]:
DiG = Reduce2GoF(smiles=smiles)
DiG[DiG.GetFragsByIdx(7)]

In [None]:
DiG.get_edge_data(DiG.GetFragsByIdx(7), DiG.GetFragsByIdx(8))

In [None]:
# or
DiG = Reduce2GoF(mol=mol) # Hint: be sure mol object derives from canonical smiles

In [None]:
DiG.get_edge_data(DiG.GetFragsByIdx(7), DiG.GetFragsByIdx(8))

# Reduced graph (DiG) can be traversed and encoded into fragSMILES by different ways

In [None]:
smiles = 'COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c3ccc(Cl)cc3'
DiG = Reduce2GoF(smiles=smiles)

In [None]:
fragsmiles = GoF2fragSMILES(DiG, canonize=True, random=False)
fragsmiles2 = CanonicalGoF2fragSMILES(DiG)
assert(fragsmiles == fragsmiles2)
fragsmiles

In [None]:
DiG.nodes

In [None]:
ordered_fragsmiles = GoF2fragSMILES(DiG, canonize=False, random=False)
print(ordered_fragsmiles)

In [None]:
decoded_DiG = fragSMILES2GoF(fragsmiles)

In [None]:
decoded_DiG.nodes

# Reduced graph randomly traversed and augmentation

In [None]:
smiles = 'COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c3ccc(Cl)cc3'
DiG = Reduce2GoF(smiles=smiles)

In [None]:
augmented = []
augmentation = 5
max_attempts = 20
attempts = 0
while attempts <= max_attempts and len(augmented) < augmentation:
	decoded = GoF2fragSMILES(DiG, canonize=False, random=True)
	if decoded not in augmented:
		augmented.append(decoded)
		continue
	attempts +=1

In [None]:
augmented