# Digital chemical reactions - Seminar Notebook

In [None]:
# Imports
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
import pandas as pd
from rdkit.Chem import AllChem
from rdkit.Chem import SDWriter, SDMolSupplier
from rdkit.Chem import rdChemReactions

### 0. SMILES

#### 0.0. Introduction to SMILES

**SMILES (Simplified Molecular Input Line Entry System)** is a specification that encodes molecular structures into a line of text using short ASCII strings. These strings provide a compact and human-readable way to represent molecules, and are widely used in cheminformatics for storing, sharing, and processing chemical data.

**Why are SMILES important?**
- They provide a standard format to represent molecules digitally.
- They can be easily parsed by computers and are supported by many cheminformatics libraries (e.g., RDKit).
- They enable descriptor generation and molecular modeling, which you'll explore in depth in the next session.

**What is RDKit?**

RDKit is an open-source cheminformatics toolkit that allows the manipulation of chemical information, including SMILES parsing, molecular descriptor calculation, substructure searching, and chemical reaction modeling.

**What is a Mol object?**

In RDKit, a Mol object is a Python representation of a molecule. It is typically created from a SMILES string or a structure file (like `.sdf`), and can be used for visualization, property calculations, and transformations.

**Basic SMILES Rules**:
- Atoms are represented by atomic symbols (e.g., C for carbon, O for oxygen).
- Single bonds are implied; double (=), triple (#), and aromatic bonds (:) are explicitly written.
- Branches are represented using parentheses.
- Rings are encoded by numbers that indicate where the ring opens and closes.

**Useful references**:
- [Daylight SMILES Tutorial](http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html)
- [RDKit SMILES Documentation](https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.MolFromSmiles)
- [Wikipedia - SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system)

#### 0.1. Some basic SMILES examples

In [None]:
# Convert a SMILES to molecule objects (Mol)
smile = "O=C=O"

# Generating molecule object 
mol = Chem.MolFromSmiles(smile)

# Display the molecule
Draw.MolToImage(mol, subImgSize=(200, 200), legend=smile)

In [None]:
# Convert SMILES to molecule objects (Mol)

# Create a list of SMILES named smiles_list
smiles_list = [
    "CCO",         # ethanol
    "c1ccccc1",    # benzene
    "CC(=O)O",     # acetic acid
    "C1=CC=CC=C1O" # phenol
]

print(smiles_list)

In [None]:
# Generate molecule objects (Mol) of smile_list and save them in a new list named mols
mols = [] # empty list
for smi in smiles_list:
    new_mol = Chem.MolFromSmiles(smi)
    mols.append(new_mol)

# Display molecules of smile_list
Draw.MolsToGridImage(mols, molsPerRow=4, subImgSize=(200, 200), legends=smiles_list)

#### 0.2. SMILES from CSV file

In [None]:
# Loading SMILES from a CSV file
"""
In many real-world scenarios, chemical data is stored in external files like CSVs. Here, we'll load a file with molecule names and their 
corresponding SMILES strings, and visualize them using RDKit.

What is a CSV file?
A CSV (Comma-Separated Values) file is a simple, plain-text file used to store tabular data, where each line represents a row and columns
are separated by commas. It's commonly used for exchanging structured data between programs like Excel, Python (via pandas), and databases.
"""

# How to create a CSV using python? (There are more ways)

# First: Creating a dictionary
data = {
    "code_name": ["ethanol", "benzene", "acetic_acid"],
    "SMILES": ["CCO", "c1ccccc1", "CC(=O)O"]
}

# Second: convert the dictionary named data into a dataframe, using the function pd.DataFrame
df = pd.DataFrame(data)

# Third: Save the dataframe into a csv file
df.to_csv("molecules.csv", index=False)


# possible question, is a dictionary the same as a dataframe?
print(data)
print(df)

In [None]:
# Load the CSV
df_loaded = pd.read_csv("molecules.csv")

# Convert SMILES from a CSV into a list of mols
mols_csv = []
for smi in df_loaded['SMILES']:
    new_mol = Chem.MolFromSmiles(smi)
    mols_csv.append(new_mol)
    
# visualize the mol objects of your list
Draw.MolsToGridImage(mols_csv, molsPerRow=3, subImgSize=(200, 200), legends=df_loaded["code_name"].tolist())

#### 0.3. SMILES from SDF file

In [None]:
"""
Reading molecules from an SDF file

SDF (Structure Data File) is a widely used file format for storing multiple molecular structures along with metadata. 
RDKit can parse these files and convert them into Mol objects.

Below is a demonstration of how to load molecules from an SDF file. You can replace 'nitriles.sdf' with any real dataset.
"""

# Load from SDF (have to be in the same folder than this .ipynb file)
sdf_file = SDMolSupplier("nitriles.sdf")

# to create the list of mols from a SDF file it is not necessary apply the function .MolFromSmiles(), because now you don't have SMILES
# mol is directly in your sdf file
mols = []
for mol in sdf_file:
    if mol is not None: # Filter out None values (invalid mols)
        mols.append(mol)

Draw.MolsToGridImage(mols, molsPerRow=6, subImgSize=(200, 200))

#### 0.4. It's your turn!

In [None]:
"""
Try the following tasks based on what you've learned so far:

1. Convert the following SMILES string into a Mol and visualize it: `CCN(CC)CC` (triethylamine)
2. Load the CSV file again and visualize the molecules.
3. Load your own SDF file and display the molecules.

Use the cells below to practice:
"""

In [None]:
# Task 1: Convert this SMILES to a Mol and visualize
practice_smiles = "CCN(CC)CC"    # triethylamine
# Your code here


In [None]:
# Task 2: From the CSV file 'oxygenated_compounds.csv', convert the SMILES to a Mol and visualize
csv_file = 'oxygenated_compounds.csv'
# Your code here


In [None]:
# Task 3: From the SDF file alkynes.sdf, convert the SMILES to a Mol and visualize
sdf_file = 'alkynes.sdf'
# Your code here


### 1. SMARTS

#### 1.0. Introduction to SMARTS

**SMARTS (SMILES Arbitrary Target Specification)** is a powerful language used to define substructures in molecules. While SMILES describes entire molecules, SMARTS allows you to describe **patterns or fragments** to match within molecules, making it ideal for filtering, searching, and performing **digital reactions**.

**Why use SMARTS?**
- Substructure matching in large datasets
- Functional group identification
- Defining reactants or transformation rules in digital reactions

**10 Useful Basic SMARTS Rules**:
1. `[#6]` — Carbon atom
2. `[O]` — Oxygen atom
3. `[#7]` — Nitrogen atom
4. `[C]=[O]` — Carbon double bonded to Oxygen (e.g., carbonyl group)
5. `[OH]` — Hydroxyl group
6. `[CH3]` — Methyl group
7. `[nH]` — Aromatic nitrogen with hydrogen (e.g., in indole)
8. `[R]` — Ring atom
9. `[!#6]` — Any atom except carbon
10. `*` — Any atom (wildcard)
11. `~` — Any bond (wildcard)
12. `[D3]` — atom with 3 explicit bonds (implicit H's don't count)
13. `[X3]` — atom with 3 total bonds (includes implicit H's)

**SMARTS Resources**:
- [Daylight SMARTS Tutorial](http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html)
- [RDKit SMARTS Examples](https://www.rdkit.org/docs/Cookbook.html#substructure-matching)
- [SMARTS Tutorial](https://www.daylight.com/dayhtml_tutorials/languages/smarts/)
- [SMARTS Examples](https://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html)

#### 1.1. Basic examples of SMARTS - Filtering only one molecule

In [None]:
# Does the molecule contain N?
# Define the pattern
pattern = "N"

# Convert SMARTS into MOL using function .MolFromSmarts()
mol_pattern = Chem.MolFromSmarts(pattern)

# Define a molecule using SMILES and convert it into molecule objects (MOL), like in the previous section
smile_molecule = "CCN(CC)CC"  # triethylamine
mol_molecule = Chem.MolFromSmiles(smile_molecule)

# Does the molecule contain N?
# To see if the pattern match with our molecule, we use the function .HasSubstructMatch()
# This function return a boolean, so the value of result can be False or True
result = mol_molecule.HasSubstructMatch(mol_pattern)


print(result) # will return a boolean
print(type(result))

#### 1.2. Applying SMARTS - Filtering a dataframe

In [None]:
# 1. Creating the data frame (as we did before in the previous section)

# Create a dictionary
data_dict = {
    "code_name": ["acetamide", "acetonitrile", "ethylamine"],
    "SMILES": ["CC(=O)N", 'CC#N', 'CCN']
}

# Create the DataFrame from a dictionary
new_df = pd.DataFrame(data_dict)

# Checking our dictionary and dataframe
print(data_dict)
print(new_df)


In [None]:
# 2. Which molecules contain nitrogen?

# Define the pattern
pattern1 = "N" # Nitrogen

# Convert SMARTS into MOL using function .MolFromSmarts()
mol_pattern1 = Chem.MolFromSmarts(pattern1)

# Check if molecules of new_df match the pattern1 using a for loop
# For each molecule, if the result of matching is True, we save that mol object in a new list, named smi_match1

# creating an empty list for the molecules which match with pattern 1
smi_match1 = []

# Converting the SMILES of the dataframe in mol object using a for loop
for smi in new_df['SMILES']:
    mol = Chem.MolFromSmiles(smi)
    
    # Checking if the result of matching is True or False
    result = mol.HasSubstructMatch(mol_pattern1)
    
    # If that result is True, we add that SMILE in the smi_match1 list
    if result == True: 
        smi_match1.append(smi)

# Viewing the items in the new list    
print(smi_match1)

# How many molecules pass the filter? We can use len() to count the elements of the list
n_list = len(smi_match1)
print(n_list)

# Example of using an f'string
print(f'{n_list} molecules pass the filter')

In [None]:
# 3. Which molecules contain AMINO groups?
# 4. Which molecules contain CIANO groups?

# Define the pattern
pattern2 = "[NX3]" # Atom of N (non-aromatic) with 3 total bonds (includes implicit H's)
pattern3 = "N#C" # Cyano groups

# Convert SMARTS into MOL using function .MolFromSmarts()
mol_pattern2 = Chem.MolFromSmarts(pattern2)
mol_pattern3 = Chem.MolFromSmarts(pattern3)

# creating an empty list for the molecules which match with pattern 2
smi_match2 = []
smi_match3 = []

# Converting the SMILES of the dataframe in mol object using a for loop
for smi in new_df['SMILES']:
    mol = Chem.MolFromSmiles(smi)
    
    # Checking if the result of matching is True or False
    result2 = mol.HasSubstructMatch(mol_pattern2)
    result3 = mol.HasSubstructMatch(mol_pattern3)    
    # If that result is True, we add that SMILE in the smi_match1 list
    if result2 == True: 
        smi_match2.append(smi)
        
    if result3 == True: 
        smi_match3.append(smi)

# Which molecules pass each filter?        
print(f'Amino filter: {smi_match2}')
print(f'Ciano filter:{smi_match3}')

# How many molecules pass the filter?
print(f'{len(smi_match2)} molecules pass the amino filter')
print(f'{len(smi_match3)} molecules pass the ciano filter')

In [None]:
# Solving the problem, it is very important to define SMARTS well
# Which molecules contain amino groups?
pattern2 = "[NX3;!$(NC=O)]" # Amino groups


In [None]:
'''Define some SMARTS patterns and test them on molecules of your dataframe'''

pattern_alcohol = "[CX4][OH]"
pattern_aromatic_ring = "c1ccccc1"
pattern_carbonyl = "[CX3]=[OX1]"

mol_alcohol = Chem.MolFromSmarts(pattern_alcohol),
mol_aromatic_ring = Chem.MolFromSmarts(pattern_aromatic_ring),
mol_carbonyl = Chem.MolFromSmarts(pattern_carbonyl)

# what is above is equivalent to what is below:
mol_alcohol = Chem.MolFromSmarts("[CX4][OH]")
mol_aromatic_ring = Chem.MolFromSmarts("c1ccccc1")
mol_carbonyl = Chem.MolFromSmarts("[CX3]=[OX1]")

# creating 4 empty lists
mol_list, match1, match2, match3 = [], [], [], []

# Converting the SMILES of the dataframe in mol object using a for loop
for smi in new_df['SMILES']:
    mol = Chem.MolFromSmiles(smi)
    mol_list.append(mol)
      
    # If that result is True, we add that SMILE in the smi_match1 list
    if mol.HasSubstructMatch(mol_alcohol) == True: 
        match1.append(smi)

    if mol.HasSubstructMatch(mol_aromatic_ring) == True: 
        match2.append(smi)
        
    if mol.HasSubstructMatch(mol_carbonyl) == True: 
        match3.append(smi)


# Check which molecules match each pattern
print(f'Alcohol filter: {match1}')
print(f'Aromatic ring filter:{match2}')
print(f'Carbonyl filter:{match3}')

Draw.MolsToGridImage(mol_list, molsPerRow=3, subImgSize=(200,200), legends=new_df["code_name"].tolist())

#### 1.3. Applying SMARTS - Filtering an SDF file

In [None]:
"""
### SMARTS Filtering in a Molecule Set
We'll simulate loading an SDF file with 20 small organic molecules, and apply a SMARTS pattern to filter those containing a ketone group.
"""

# Read the SDF file sample_mols.sdf
sdf_supplier = SDMolSupplier("sample_mols.sdf")

# Defining the pattern and converting it to a mol object, in one line
carbonyl_pattern = Chem.MolFromSmarts('[#6][CX3](=O)[#6]')

# creating 2 empty lists
matched, unmatched = [], []
for mol in sdf_supplier:
    
    # to avoid invalid MOL
    if mol is not None:
        
        if mol.HasSubstructMatch(carbonyl_pattern) == True:
            matched.append(mol)
            
        else:
            unmatched.append(mol)


print(f"Matched: {len(matched)} | Unmatched: {len(unmatched)}")
Draw.MolsToGridImage(matched, molsPerRow=6, subImgSize=(200,200))

#### 1.4. It's your turn!

In [None]:
# Task 4: Does this molecule contain any oxygen atoms? Solve by applying SMART, obtaining a boolean
molecule = "CCN(CC)CC"
# Your code here


In [None]:
# Task 5: From CSV file, filter molecules containing carboxylic acid groups using SMARTS and visualize them
"""
SMARTS Filtering from CSV

We've prepared a CSV with some oxygenated compounds. Your task is to:

- Load the CSV into a DataFrame
- Convert the SMILES to Mol objects
- Define a SMARTS pattern for carboxylic acid groups
- Filter and visualize only the molecules containing carboxylic acid groups 

Hint: Beware of aldehydes and ester.
"""
csv_file = 'oxygenated_compounds.csv'
# Your code here


In [None]:
# Task 6: From SDF file, filter molecules containing aldehydes groups using SMARTS and visualize them
"""
SMARTS Filtering from SDF

Your task is to:

- Load the SDF
- Define a correct SMARTS pattern for aldehydes groups
- Filter and visualize only the molecules containing aldehydes acid groups

Hint: Take the example of section 1.3. and change the pattern.
"""
sdf_file = 'sample_mols.sdf'
# Your code here

### 2. DIGITAL CHEMICAL REACTIONS

#### 2.0. Introduction to Digital Chemical Reactions

Digital chemical reactions simulate real chemical transformations using SMARTS-based reaction templates. These templates define the transformation logic (reactant patterns → product structure) and can be applied programmatically using RDKit's `rdChemReactions` module.

**Common uses**:
- Automating reaction design
- Virtual synthesis of new molecules
- Filtering or modifying compound libraries

**Basic Imports for Digital Reactions**:
```python
from rdkit.Chem import rdChemReactions
```

**Structure**:
```python
rxn = rdChemReactions.ReactionFromSmarts("[C:1]=[O:2].[N:3]>>[C:1](N:3)[O:2]")
products = rxn.RunReactants((mol1, mol2))
```

**Common pitfalls**:
- Reactants must match the SMARTS pattern exactly.
- Atom mapping (`:[n]`) is essential to track atoms during transformations.
- Input molecule order matters for multi-reactant reactions.

**Resources**:
- [RDKit Reaction Guide](https://www.rdkit.org/docs/Cookbook.html#using-chemical-reactions)
- [Reaction SMARTS - Daylight](http://www.daylight.com/dayhtml/doc/theory/theory.reactions.html)
- [SMARTS display](https://smarts.plus/)

#### 2.1. Basic examples of digital reactions

In [None]:
# Define a simple esterification reaction: carboxylic acid + alcohol -> ester

# Defining the SMARTS of the reactions
smarts_reaction = "[C:1](=O)[O:2].[O:3][C:4]>>[C:1](=O)[O:3][C:4].[O:2]"

# Applying the function .ReactionFromSmarts() to specify that it is a reaction
rxn = rdChemReactions.ReactionFromSmarts(smarts_reaction)

# Defining the substrates of our reaction (SMILES -> MOL) 
acid = Chem.MolFromSmiles("CC(=O)O")
alcohol = Chem.MolFromSmiles("CCO")

# Run the reaction using the function reaction.RunReactants((substrates))
products = rxn.RunReactants((acid, alcohol))

# Dsiplay the products
Draw.MolsToGridImage([p[0] for p in products])

#### 2.1. Digital Reactions from a CSV (Automated Reaction Workflow)

In [None]:
# Load CSV with molecules as dataframe
df = pd.read_csv("oxygenated_compounds.csv")
print (df)

In [None]:
# adding a new column in our dataframe named mol_object
df["mol_object"] = df["SMILES"].apply(Chem.MolFromSmiles)
print(df)

In [None]:
# Display our molecules, using mol_object column
Draw.MolsToGridImage(df["mol_object"], molsPerRow=8, subImgSize=(400, 400))

In [None]:
# Define a simple ketone → enol conversion
smarts_reaction = "[C:0][C:1](=O)[C:2]>>[C:0][C:1](O)(=[C:2])"
rxn = rdChemReactions.ReactionFromSmarts(smarts_reaction)

# creating 2 empty lists
product_names, product_smiles = [], []

# running the reaction for each possible combinations and saving the products in the new 2 lists
for i, mol in enumerate(df["mol_object"]):  # i is just the enumeration (0, 1, 2, 3...)
    if mol is None:  # only continues if the mol is valid
        continue
    ps = rxn.RunReactants((mol,))    # The double parentheses are because it expects a tuple
    if ps:   # check if the reaction gives any product
        product = ps[0][0]    # ps is a list of results, each being a tuple of products.
        product_names.append(f"product_{i+1}")    # defining the product name and adding it to the list
        product_smiles.append(Chem.MolToSmiles(product))    # adding the SMILE to the second list
        
# Saving the products as a df
product_df = pd.DataFrame({"product_name": product_names, 
                           "product_smiles": product_smiles})

# adding a new column of product_mol
product_df['product_mol'] = product_df["product_smiles"].apply(Chem.MolFromSmiles)

# Saving it as a CSV file titled "digital_products.csv"
product_df.to_csv("digital_products.csv", index=False)

# Display products, using product_mol column
Draw.MolsToGridImage(product_df['product_mol'], molsPerRow=5, subImgSize=(200, 200))

#### 2.2. Digital Reactions from an SDF File

In [None]:
sdf_supplier = SDMolSupplier("sample_mols.sdf")

# Reaction: Replace ketone (C=O) with alkene (C=C)
rxn = rdChemReactions.ReactionFromSmarts("[#6:1][C:2](=O)[#6:3]>>[#6:1][C:2]=[C:3]")

alkenes = []
for mol in sdf_supplier:   # Iterate through each molecule in the .sdf file.
    if mol is None:        # Ignore any that are incorrectly formatted or empty.
        continue
    ps = rxn.RunReactants((mol,))   # Apply the digital reaction to each molecule.
    if ps:                          # If there is a result, continue.
        alkenes.append(ps[0][0])    # Save the first generated product.

Draw.MolsToGridImage(alkenes, molsPerRow=4)

#### 2.3. It's your turn!

In [None]:
# Task 7: 
'''
In the reaction from Section 2.1, why doesn't the acetophenone molecule produce the product if it's a ketone? 
How could you modify the reaction so that it also produces its product?

Hint: Copy and paste the example of section 2.1. and change the smarts_reaction.
'''

# Your code here

### Solutions to the tasks

In [None]:
# Task 1: Convert this SMILES to a Mol and visualize
practice_smiles = "CCN(CC)CC"
# Your code here
mol = Chem.MolFromSmiles("CCN(CC)CC")
Draw.MolToImage(mol, molsPerRow=1, subImgSize=(200, 200), legend='triethylamine')

In [None]:
# Task 2: From the CSV file 'oxygenated_compounds.csv', convert the SMILES to a Mol and visualize
csv_file = 'oxygenated_compounds.csv'
# Your code here

df_loaded = pd.read_csv(csv_file)

mols_csv = []
for smi in df_loaded['SMILES']:
    new_mol = Chem.MolFromSmiles(smi)
    mols_csv.append(new_mol)

mols_csv = [Chem.MolFromSmiles(smi) for smi in df_loaded["SMILES"]]
Draw.MolsToGridImage(mols_csv, molsPerRow=5, subImgSize=(200, 200), legends=df_loaded["name"].tolist())

In [None]:
# Task 3: From the SDF file alkynes.csv, convert the SMILES to a Mol and visualize
sdf_file = 'alkynes.sdf'
# Your code here
# Load from SDF (have to be in the same folder than this .ipynb file)
sdf_file = SDMolSupplier(sdf_file)

mols = []
for mol in sdf_file:
    if mol is not None: # Filter out None values (invalid mols)
        mols.append(mol)
        
Draw.MolsToGridImage(mols, molsPerRow=7, subImgSize=(200, 200))

In [None]:
# Task 4: Does this molecule contain any oxygen atoms? Solve by applying SMART, obtaining a boolean
molecule = "CCN(CC)CC"
# Your code here

pattern = "O"
mol_pattern = Chem.MolFromSmarts(pattern)

if (mol.HasSubstructMatch(mol_pattern)) == True:
    word = 'contains'
else:
    word = "doesn't contain"
    
print(f'The molecule {molecule} {word} oxygen atoms')

In [None]:
# Task 5: From CSV file, filter molecules containing carboxylic acid groups using SMARTS and visualize them
"""
SMARTS Filtering from CSV

We've prepared a CSV with some oxygenated compounds. Your task is to:

- Load the CSV into a DataFrame
- Convert the SMILES to Mol objects
- Define a SMARTS pattern for carboxylic acid groups
- Filter and visualize only the molecules containing carboxylic acid groups 

Hint: Beware of aldehydes and ester.
"""
csv_file = 'oxygenated_compounds.csv'
# Your code here


df_task = pd.read_csv(csv_file)

mol_list = []
for smi in df_task['SMILES']:
    mol_list.append(Chem.MolFromSmiles(smi))


pattern_acid = '[CX3](=O)[OH]'
mol_pattern = Chem.MolFromSmarts(pattern_acid)

matched, unmatched = [], []
for mol in mol_list:
    if mol is not None:
        if mol.HasSubstructMatch(mol_pattern) == True:
            matched.append(mol)
            
        else:
            unmatched.append(mol)


print(f"Matched: {len(matched)} | Unmatched: {len(unmatched)}")
Draw.MolsToGridImage(matched, molsPerRow=7, subImgSize=(200,200))

In [None]:
# Task 6: From SDF file, filter molecules containing aldehydes groups using SMARTS and visualize them
"""
SMARTS Filtering from SDF

Your task is to:

- Load the SDF
- Define a correct SMARTS pattern for aldehydes groups
- Filter and visualize only the molecules containing aldehydes acid groups

Hint: Take the example of section 1.3. and change the pattern.
"""
sdf_file = 'sample_mols.sdf'
# Your code here

# Read the SDF file sample_mols.sdf
sdf_supplier = SDMolSupplier("sample_mols.sdf")

# Defining the pattern and converting it to a mol object, in one line
carbonyl_pattern = Chem.MolFromSmarts('[CX3H1](=O)[#6]')

# creating 2 empty lists
matched, unmatched = [], []
for mol in sdf_supplier:
    
    # to avoid invalid SMILES
    if mol is not None:
        
        if mol.HasSubstructMatch(carbonyl_pattern) == True:
            matched.append(mol)
            
        else:
            unmatched.append(mol)


print(f"Matched: {len(matched)} | Unmatched: {len(unmatched)}")
Draw.MolsToGridImage(matched, molsPerRow=6, subImgSize=(200,200))

In [None]:
# Task 7: 
'''
In the reaction from Section 2.1, why doesn't the acetophenone molecule produce the product if it's a ketone? 
How could you modify the intelligent reaction so that it also produces its product?

Hint: Copy and paste the example of section 2.1. and change the smarts_reaction.
'''

# Your code here
# Define a simple ketone → enol conversion
smarts_reaction = "[#6:0][C:1](=O)[C:2]>>[#6:0][C:1](O)(=[C:2])"
rxn = rdChemReactions.ReactionFromSmarts(smarts_reaction)

# creating 2 empty lists
product_names, product_smiles = [], []

# running the reaction for each possible combinations and saving the products in the new 2 lists
for i, mol in enumerate(df["mol_object"]):  # i is just the enumeration (0, 1, 2, 3...)
    if mol is None:  # only continues if the mole (SMILE) is valid
        continue
    ps = rxn.RunReactants((mol,))    # The double parentheses are because it expects a tuple
    if ps:   # check if the reaction gives any product
        product = ps[0][0]    # ps is a list of results, each being a tuple of products.
        product_names.append(f"product_{i+1}")    # defining the product name and adding it to the list
        product_smiles.append(Chem.MolToSmiles(product))    # adding the SMILE to the second list
        
# Saving the products as a df
product_df = pd.DataFrame({"product_name": product_names, 
                           "product_smiles": product_smiles})

# adding a new column of product_mol
product_df['product_mol'] = product_df["product_smiles"].apply(Chem.MolFromSmiles)

# Saving it as a CSV file titled "digital_products.csv"
product_df.to_csv("digital_products.csv", index=False)

# Display products, using product_mol column
Draw.MolsToGridImage(product_df['product_mol'], molsPerRow=5, subImgSize=(200, 200))