In [1]:
from itertools import permutations
import json
from matplotlib import pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw

%matplotlib inline

In [2]:
nitric_acid = "O[N+](=O)[O-]"
ammonia = "N"
aniline = "c1ccc(N)cc1"
hydrogen = "[H][H]"
hydrazine = "NN"
chlorine = "ClCl"
ethyl_alcohol = "CCO"
isopropyl_alcohol = "CC(O)C"
acetic_acid = "CC(=O)O"
phenol = "c1cc(O)ccc1"
diethyl_ether = "CCOCC"
dimethyl_ether = "COC"

In [3]:
ar_nu = "aromatic nucleophilic substitution"
ar_el = "aromatic electrophilic substitution"

In [4]:
ewgs = [
    "[N+](=O)[O-]", "C(=O)CC", "C(=O)NC", 
    "C(F)(F)F", "C(Cl)(Cl)(Cl)", "C#N", "[N+](C)(C)C",
    "OS(=O)=O", "OS(=O)(=O)C"
]

edgs = [
    "N", "N(C)C", "C(C)(C)C", "O", "NC(=O)CC",
    "S", "SC"
]

nuc_ins = [
    "N", ethyl_alcohol, isopropyl_alcohol, "N(CC)CC", "N(C)C", "CN"
]

nuc_acs = [
    "N", "OCC", "OC(C)C", "N(CC)CC", "N(C)C", "NC"
]

leaving_groups = ["Cl", "F", "OS(=O)=O", "OS(=O)(=O)C"]

In [5]:
tests = []

In [118]:
#Pyrrole, w/o explicit EWG. Ortho- position.

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations(["", leaving_groupf]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            reagents = ["c{}1ccc{}[nH]1".format(*perm), nuc_in]
            
            pf = "c{}1ccc{}[nH]1"
            product = pf.format(*perm_out)
            
            paths = [[ar_nu]]
            
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [119]:
#Pyrrole, w/o explicit EWG. Meta- position.

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations(["", leaving_groupf]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            reagents = ["c1c{}c{}c[nH]1".format(*perm), nuc_in]
            
            pf = "c{}1ccc{}[nH]1"
            product = pf.format(*perm_out)
            
            paths = []
            
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [120]:
#Pyrrole, EDG in para- , iterferring effect.

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for edg in edgs:
        edgf = "({})".format(edg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c{}1ccc{}[nH]1".format(edgf, *perm), nuc_in]

                pf = "c{}1ccc{}[nH]1"
                product = pf.format(edgf, *perm_out)

                paths = []

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [121]:
# Pyrrole, EDG in meta-, non-interferring

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for edg in edgs:
        edgf = "({})".format(edg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c1c{}cc{}[nH]1".format(edgf, *perm), nuc_in]

                pf = "c1c{}cc{}[nH]1"
                product = pf.format(edgf, *perm_out)

                paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })
                
for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for edg in edgs:
        edgf = "({})".format(edg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c{}1cc{}c[nH]1".format(*perm, edgf), nuc_in]

                pf = "c{}1cc{}c[nH]1"
                product = pf.format(*perm_out, edgf)

                paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [123]:
# Pyrrole, EWG in para wrt to leaving group, coordinates.

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for ewg in ewgs:
        ewgf = "({})".format(ewg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c{}1ccc{}[nH]1".format(ewgf, *perm), nuc_in]

                pf = "c{}1ccc{}[nH]1"
                product = pf.format(ewgf, *perm_out)

                paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [124]:
# Pyrrole, 1 explicit EWG, two leaving group, one is in meta- wrt to both EWGs

for leaving_group1 in leaving_groups:
    leaving_group1f = "({})".format(leaving_group1)
    for leaving_group2 in leaving_groups:
        
        leaving_group2f = "({})".format(leaving_group2)
        for ewg in ewgs:
            ewgf = "({})".format(ewg)
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                nuc_acf = "({})".format(nuc_ac)
                perm = [leaving_group1f, leaving_group2f]
                    
                perm_out = [leaving_group1f, nuc_ac]

                reagents = ["c{}1cc{}c{}[nH]1".format(ewgf, *perm), nuc_in]

                pf = "c{}1cc{}c{}[nH]1"
                product = pf.format(ewgf, *perm_out)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg in leaving_groups and \
                    leaving_group2 in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_acf, *perm)):

                    paths = []
                else:
                    paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [125]:
# Pyrazole, w/o explicit EWG, ortho- position

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf, ""]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            
            reagents = ["c1{}cc{}[nH]n1".format(*perm), nuc_in]
            
            pf = "c1{}cc{}[nH]n1"
            product = pf.format(*perm_out)
            
            paths = [[ar_nu]]

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [126]:
# Pyrazole, w/o explicit EWG, meta- position

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            
            reagents = ["c1c{}c[nH]n1".format(*perm), nuc_in]
            
            pf = "c1c{}c[nH]n1"
            product = pf.format(*perm_out)
            
            paths = []

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [127]:
# Imidazole, w/o explicit EWG, reactive position (ortho- wrt both)

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            
            reagents = ["c1c[nH]c{}n1".format(*perm), nuc_in]
            
            pf = "c1c[nH]c{}n1"
            product = pf.format(*perm_out)
            
            paths = [[nuc_ac]]

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [128]:
# Imidazole, w/o explicit EWG, reactive position (meta- wrt any)

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf, ""]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            
            reagents = ["c{}1c{}[nH]cn1".format(*perm), nuc_in]
            
            pf = "c{}1c{}[nH]cn1"
            product = pf.format(*perm_out)
            
            paths = []

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [129]:
# Imidazole, with explicit EWG in meta-, no interference

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for ewg in ewgs:
        ewgf = "({})".format(ewg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]

                reagents = ["c{}1c[nH]c{}n1".format(ewgf, *perm), nuc_in]

                pf = "c{}1c[nH]c{}n1"
                product = pf.format(ewgf, *perm_out)

                paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [130]:
# Imidazole, with explicit EDG in meta-, no interference

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for edg in edgs:
        edgf = "({})".format(edg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]

                reagents = ["c{}1c[nH]c{}n1".format(edgf, *perm), nuc_in]

                pf = "c{}1c[nH]c{}n1"
                product = pf.format(edgf, *perm_out)

                paths = [[ar_nu]]

                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [131]:
# Piridine, w/o explicit EWG, meta- position
for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf, ""]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]

            reagents = ["c1c{}cc{}cn1".format(*perm), nuc_in]

            pf = "c1c{}cc{}cn1"
            product = pf.format(*perm_out)

            paths = [[]]

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [132]:
# Piridine, w/o explicit EWG, otho-/para- position
for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)
        for perm in set(list(permutations([leaving_groupf, "", ""]))):
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]

            reagents = ["c{}1cc{}cc{}n1".format(*perm), nuc_in]

            pf = "c{}1cc{}cc{}n1"
            product = pf.format(*perm_out)

            paths = [[ar_nu]]

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [133]:
# Piridine, explicit EDG, interferring
for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for edg in edgs:
        edgf = "({})".format(edg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations([leaving_groupf, "", ""]))):
                for perm_edg in set(list(permutations([edgf, ""]))):
                    perm_dict = dict([(k, v) for k, v in zip(["lg_{}".format(i) for i in range(len(perm))], perm)])
                    edg_dict = dict([k, v] for k, v in zip(["dg_{}".format(i) for i in range(len(perm_edg))], perm_edg))
                    perm_dict.update(edg_dict)
                    
                    lg_num = [i for i in perm_dict.keys() if perm_dict[i] == leaving_groupf][0]
                    perm_out_dict = perm_dict.copy()
                    perm_out_dict[lg_num] = nuc_acf

                    reagents = ["c{lg_0}1c{dg_0}c{lg_1}c{dg_1}c{lg_2}n1".format(**perm_dict), nuc_in]

                    pf = "c{lg_0}1c{dg_0}c{lg_1}c{dg_1}c{lg_2}n1"
                    product = pf.format(**perm_out_dict)

                    paths = [[]]

                    tests.append({
                            "reagents": reagents,
                            "product": product,
                            "paths": paths
                        })

In [134]:
# Indole, w/o explicit EWG, the only valid position

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)

        reagents = ["c{}1cc2ccccc2[nH]1".format(leaving_groupf), nuc_in]

        pf = "c{}1cc2ccccc2[nH]1"
        product = pf.format(nuc_acf)

        paths = [[ar_nu]]

        tests.append({
                "reagents": reagents,
                "product": product,
                "paths": paths
            })

In [135]:
# Indole, w/o explicit EWG, invalid positions

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
        nuc_acf = "({})".format(nuc_ac)

        for perm in set(permutations([leaving_groupf, "", "", "", ""])):
            reagents = ["c1c{}c2c{}c{}c{}c{}c2[nH]1".format(*perm), nuc_in]

            pf = "c1c{}c2c{}c{}c{}c{}c2[nH]1"
            perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
            product = pf.format(*perm_out)

            paths = [[]]

            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [137]:
json.dump(tests, open("../cases/nucleo_hetero_aro.json", "w"))