In [4]:
import json
from matplotlib import pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw

%matplotlib inline

In [22]:
nitric_acid = "O[N+](=O)[O-]"
ammonia = "N"
aniline = "c1ccc(N)cc1"
hydrogen = "[H][H]"
hydrazine = "NN"
chlorine = "ClCl"
ethyl_alcohol = "CCO"
isopropyl_alcohol = "CC(O)C"
acetic_acid = "CC(=O)O"
phenol = "c1cc(O)ccc1"
diethyl_ether = "CCOCC"
dimethyl_ether = "COC"

In [23]:
ar_nu = "aromatic nucleophilic substitution"
ar_el = "aromatic electrophilic substitution"

In [88]:
ewgs = [
    "[N+](=O)[O-]", "C(=O)CC", "C(=O)NC", 
    "C(F)(F)F", "C(Cl)(Cl)(Cl)", "C#N", "[N+](C)(C)C",
    "OS(=O)=O", "OS(=O)(=O)C"
]

edgs = [
    "N", "N(C)C", "C(C)(C)C", "O", "NC(=O)CC",
    "S", "S(C)"
]

nuc_ins = [
    "N", ethyl_alcohol, isopropyl_alcohol, "N(CC)CC", "N(C)C", "CN"
]

nuc_acs = [
    "N", "OCC", "OC(C)C", "N(CC)CC", "N(C)C", "NC"
]

leaving_groups = ["Cl", "F", "OS(=O)=O", "OS(=O)(=O)C"]

In [284]:
tests = []

In [285]:
# Nucleophilic substitution doesn't appear, leaving group in meta- position wrt ewg

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1ccc({})cc1{}".format(ewg, leaving_group), nuc_in]
            product = "c1ccc({})cc1{}".format(ewg, nuc_ac)
            paths = []
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [286]:
# Nucleophilic substitution appears in ortho- position

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1cccc({})c1{}".format(ewg, leaving_group), nuc_in]
            pf = "c1cccc({})c1{}"
            product = pf.format(ewg, nuc_ac)
            
            # If reaction could have two ways
            # We usually cannot control, which way it'll take
            # However, in symmetrical case, we can rely on it
            if ewg in leaving_groups and \
                Chem.CanonSmiles(product) != \
                Chem.CanonSmiles(pf.format(nuc_ac, leaving_group)):
                    
                paths = []
            
            else:
                paths = [[ar_nu]]
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [287]:
# Nucleophilic substitution appears in ortho- position

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1({})ccccc1{}".format(ewg, leaving_group), nuc_in]
            pf = "c1({})ccccc1{}"
            product = pf.format(ewg, nuc_ac)
            
            # If reaction could have two ways
            # We usually cannot control, which way it'll take
            # However, in symmetrical case, we can rely on it
            if ewg in leaving_groups and \
                Chem.CanonSmiles(product) != \
                Chem.CanonSmiles(pf.format(nuc_ac, leaving_group)):
                    
                paths = []
            
            else:
                paths = [[ar_nu]]
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [288]:
# Nucleophilic substitution appears in para- position

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1cc({})ccc1{}".format(ewg, leaving_group), nuc_in]
            pf = "c1cc({})ccc1{}"
            product = pf.format(ewg, nuc_ac)
            
            # If reaction could have two ways
            # We usually cannot control, which way it'll take
            # However, in symmetrical case, we can rely on it
            if ewg in leaving_groups and \
                Chem.CanonSmiles(product) != \
                Chem.CanonSmiles(pf.format(nuc_ac, leaving_group)):
                paths = []
            
            else:
                paths = [[ar_nu]]
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [289]:
# Nucleophilic substitution doesn't appear with EDG in ortho- position

for leaving_group in leaving_groups:
    for edg in edgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1({})ccccc1{}".format(edg, leaving_group), nuc_in]
            product = "c1({})ccccc1{}".format(edg, nuc_ac)
            paths = []
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [290]:
# Nucleophilic substitution doesn't appear with EDG in para- position

for leaving_group in leaving_groups:
    for edg in edgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1cc({})ccc1{}".format(edg, leaving_group), nuc_in]
            product = "c1cc({})ccc1{}".format(edg, nuc_ac)
            paths = []
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [291]:
# Nucleophilic substitution doesn't appear with EDG in meta- position

for leaving_group in leaving_groups:
    for edg in edgs:
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            reagents = ["c1c({})cccc1{}".format(edg, leaving_group), nuc_in]
            product = "c1c({})cccc1{}".format(edg, nuc_ac)
            paths = []
            tests.append({
                    "reagents": reagents,
                    "product": product,
                    "paths": paths
                })

In [292]:
# Nucleophilic substitution doesn't appear with opposite effects (donation and acception of electrons), ortho- ortho-

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for edg in edgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1cccc({})c1{}".format(ewg, edg, leaving_group), nuc_in]
                product = "c({})1cccc({})c1{}".format(ewg, edg, nuc_ac)
                paths = []
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [293]:
# Nucleophilic substitution doesn't appear with opposite effects (donation and acception of electrons), ortho- para-

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for edg in edgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1cc({})ccc1{}".format(ewg, edg, leaving_group), nuc_in]
                product = "c({})1cc({})ccc1{}".format(ewg, edg, nuc_ac)
                paths = []
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [294]:
# Nucleophilic substitution appears with non-opposite effects (donation and acception of electrons), ortho- meta-

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for edg in edgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1ccc({})cc1{}".format(ewg, edg, leaving_group), nuc_in]
                pf = "c({})1ccc({})cc1{}"
                product = pf.format(ewg, edg, nuc_ac)
                
                # Even if leaving group is electron withdrawing,
                # Reaction cannot process in a way to substitute EWG
                # Because of EDG in para-position

                paths = [[ar_nu]]
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [295]:
# Nucleophilic substitution appears with non-opposite effects (donation and acception of electrons), para- meta-

for leaving_group in leaving_groups:
    for ewg in ewgs:
        for edg in edgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c1cc({})c({})cc1{}".format(ewg, edg, leaving_group), nuc_in]
                pf = "c1cc({})c({})cc1{}"
                product = pf.format(ewg, edg, nuc_ac)
                
                # Even if leaving group is electron withdrawing,
                # Reaction cannot process in a way to substitute EWG
                # Because of EDG in para-position

                paths = [[ar_nu]]
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [296]:
# Nucleophilic substitution appears with coordinate effects (acception and acception of electrons), ortho- ortho-

for leaving_group in leaving_groups:
    for ewg1 in ewgs:
        for ewg2 in ewgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1cccc({})c1{}".format(ewg1, ewg2, leaving_group), nuc_in]
                pf = "c({})1cccc({})c1{}"
                product = pf.format(ewg1, ewg2, nuc_ac)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg1 in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_ac, ewg2, leaving_group)):
                        
                    paths = []
                    
                elif ewg2 in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(ewg1, nuc_ac, leaving_group)):
                        
                    paths = []

                else:
                    paths = [[ar_nu]]
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [298]:
# Nucleophilic substitution appears with EWG in ortho- with another leaving group in meta- (which stays)

for leaving_group1 in leaving_groups:
    for leaving_group2 in leaving_groups:
        for ewg in ewgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1cccc({})c1{}".format(ewg, leaving_group2, leaving_group1), nuc_in]
                pf = "c({})1cccc({})c1{}"
                product = pf.format(ewg, leaving_group2, nuc_ac)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_ac, leaving_group2, leaving_group1)):
                        
                    paths = []

                else:
                    paths = [[ar_nu]]
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [299]:
# Nucleophilic substitution appears with EWG in para- with another leaving group in meta- (which stays)

for leaving_group1 in leaving_groups:
    for leaving_group2 in leaving_groups:
        for ewg in ewgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1cc({})c({})cc1".format(ewg, leaving_group2, leaving_group1), nuc_in]
                pf = "c({})1cc({})c({})cc1"
                product = pf.format(ewg, leaving_group2, nuc_ac)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_ac, leaving_group2, leaving_group1)):
                        
                    paths = []

                else:
                    paths = [[ar_nu]]
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [300]:
# Nucleophilic substitution appears with EWG in ortho- with another leaving group in ortho- both ways,
# so that's impossible to substitute only one

for leaving_group1 in leaving_groups:
    for leaving_group2 in leaving_groups:
        for ewg in ewgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1c({})cccc1{}".format(ewg, leaving_group2, leaving_group1), nuc_in]
                for direction in [(ewg, leaving_group2, nuc_ac), (ewg, nuc_ac, leaving_group1)]:
                    product = "c({})1c({})cccc1{}".format(*direction)
                    paths = []
                    tests.append({
                            "reagents": reagents,
                            "product": product,
                            "paths": paths
                        })

In [301]:
# Nucleophilic substitution appears with EWG in ortho- with another leaving group in ortho- and para- both ways,
# so that's impossible to substitute only one

for leaving_group1 in leaving_groups:
    for leaving_group2 in leaving_groups:
        for ewg in ewgs:
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                reagents = ["c({})1ccc({})cc1{}".format(ewg, leaving_group2, leaving_group1), nuc_in]
                for direction in [(ewg, leaving_group2, nuc_ac), (ewg, nuc_ac, leaving_group1)]:
                    product = "c({})1ccc({})cc1{}".format(*direction)
                    paths = []
                    tests.append({
                            "reagents": reagents,
                            "product": product,
                            "paths": paths
                        })

In [302]:
# Conjugated carbon aromatic system. Like ortho- position

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for ewg in ewgs:
        ewgf = "({})".format(ewg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations(["", "", "", leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c12c{}c{}cc{}c1c{}cc{}c2".format(ewgf, *perm), nuc_in]
                
                pf = "c12c{}c{}cc{}c1c{}cc{}c2"
                product = pf.format(ewgf, *perm_out)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_acf, *perm)):
                        
                    paths = []
                else:
                    paths = [[ar_nu]]
                    
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [303]:
# Conjugated carbon aromatic system. Like ortho- position
# EWG moved to a point, which have 4 symmetric points

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for ewg in ewgs:
        ewgf = "({})".format(ewg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            nuc_acf = "({})".format(nuc_ac)
            for perm in set(list(permutations(["", "", "", leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c1{}c{}cc2cc{}cc{}c2c1{}".format(ewgf, *perm), nuc_in]
                
                pf = "c1{}c{}cc2cc{}cc{}c2c1{}"
                product = pf.format(ewgf, *perm_out)

                # If reaction could have two ways
                # We usually cannot control, which way it'll take
                # However, in symmetrical case, we can rely on it
                if ewg in leaving_groups and \
                    leaving_group in ewgs and \
                    Chem.CanonSmiles(product) != \
                    Chem.CanonSmiles(pf.format(nuc_acf, *perm)):
                        
                    paths = []
                else:
                    paths = [[ar_nu]]
                    
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [304]:
# Conjugated carbon aromatic system. Like meta- position

for leaving_group in leaving_groups:
    leaving_groupf = "({})".format(leaving_group)
    for ewg in ewgs:
        ewgf = "({})".format(ewg)
        for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
            for perm in set(list(permutations(["", "", leaving_groupf]))):
                perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                reagents = ["c12c({})cc{}cc1cc{}cc2{}".format(ewgf, *perm), nuc_in]
                
                pf = "c12c({})cc{}cc1cc{}cc2{}"
                product = pf.format(ewgf, *perm_out)
                paths = []
                    
                tests.append({
                        "reagents": reagents,
                        "product": product,
                        "paths": paths
                    })

In [305]:
# Conjugated carbon aromatic system. Like ortho- position. Two EWG, synergic effect

for leaving_group in leaving_groups:
    for ewg1 in ewgs:
        ewg1f = "({})".format(ewg1)
        for ewg2 in ewgs:
            ewg2f = "({})".format(ewg2)
            for nuc_in, nuc_ac in zip(nuc_ins, nuc_acs):
                nuc_acf = "({})".format(nuc_ac)
                for perm in set(list(permutations(["", "", "", leaving_groupf]))):
                    perm_out = [d if d != leaving_groupf else nuc_acf for d in perm]
                    reagents = ["c12c{}c{}cc{}c1c{}cc{}c2{}".format(ewg1f, *perm, ewg2f), nuc_in]

                    pf = "c12c{}c{}cc{}c1c{}cc{}c2{}"
                    product = pf.format(ewg1f, *perm_out, ewg2f)

                    # If reaction could have two ways
                    # We usually cannot control, which way it'll take
                    # However, in symmetrical case, we can rely on it
                    if ewg1 in leaving_groups and \
                        leaving_group in ewgs and \
                        Chem.CanonSmiles(product) != \
                        Chem.CanonSmiles(pf.format(nuc_acf, *perm, ewg2f)):
                            
                        paths = []
                        
                    elif ewg2 in leaving_groups and \
                        leaving_group in ewgs and \
                        Chem.CanonSmiles(product) != \
                        Chem.CanonSmiles(pf.format(ewg1f, *perm, nuc_acf)):
                            
                        paths = []
                    else:
                        paths = [[ar_nu]]

                    tests.append({
                            "reagents": reagents,
                            "product": product,
                            "paths": paths
                        })

In [308]:
json.dump(tests, open("nucleo_carbo_aro.json", "w"))