In [1]:
import pandas as pd

In [2]:
data  = pd.read_csv("/Users/atabeyunlu/multi_agent_paper/runs_metadata/0611_1648_GO5/unique_ordered_molecules.csv")

In [None]:
data.tail()

In [7]:
database_mols = data[data["agent"] == "Database Agent"]
ai_mols = data[data["agent"] == "AI Expert"]
med_chem_mols = data[data["agent"] == "Medicinal Chemist"]

In [None]:
data.sort_values(by="qed", ascending=False).head(14).sort_values(by="docking_score", ascending=True)["smiles"][37]

In [30]:
from rdkit import Chem

smi = "Cc1cc(NC2=NC=C(F)C(N[C@H]3CC[C@@H](O)CC3)=N2)ccc1c1nnnn1[H]"

In [None]:
from rdkit import Chem

smi = "Cc1cc(NC2=NC=C(F)C(N[C@H]3CC[C@@H](O)CC3)=N2)ccc1c1nnnn1[H]"

Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=False)

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet-3-7-llm-only/unique_ordered_molecules.csv")

In [None]:
from rdkit import Chem

# Check validity of SMILES in med_chem dataframe
def check_smiles_validity(smiles):
    """Check if a SMILES string is valid using RDKit"""
    if pd.isna(smiles) or smiles == '':
        return False
    mol = Chem.MolFromSmiles(smiles)
    return mol is not None

# Apply validity check to all SMILES in med_chem dataframe
valid_smiles = data['smiles'].apply(check_smiles_validity)

# Count valid and invalid SMILES
valid_count = valid_smiles.sum()
invalid_count = (~valid_smiles).sum()
total_count = len(data)

print(f"SMILES Validity Check Results:")
print(f"Total molecules: {total_count}")
print(f"Valid SMILES: {valid_count}")
print(f"Invalid SMILES: {invalid_count}")
print(f"Validity rate: {valid_count/total_count*100:.2f}%")

# Show invalid SMILES if any exist
if invalid_count > 0:
    print(f"\nInvalid SMILES found:")
    invalid_smiles = data[~valid_smiles]['smiles'].tolist()
    for i, smiles in enumerate(invalid_smiles[:10]):  # Show first 10 invalid SMILES
        print(f"  {i+1}. {smiles}")
    if len(invalid_smiles) > 10:
        print(f"  ... and {len(invalid_smiles) - 10} more")

# Filter to only valid molecules
med_chem_valid = data[valid_smiles].copy()
print(f"\nFiltered dataframe with valid SMILES: {len(med_chem_valid)} molecules")


In [None]:
data.smiles.nunique()

In [5]:
akt1 = pd.read_csv("/Users/atabeyunlu/multi_agent_paper/activity_data.csv")
akt1_actives = akt1[akt1["pchembl_value"] >= 6]

In [19]:
def novelty(gen, train, n_jobs=1):
    gen_smiles_set = set(gen) - {None}
    train_set = set(train)
    return 0 if len(gen_smiles_set) == 0 else len(gen_smiles_set - train_set) / len(gen_smiles_set)



In [7]:
import torch
import numpy as np
def average_agg_tanimoto(stock_vecs, gen_vecs,
                         batch_size=5000, agg='max',
                         device='cpu', p=1, intdiv=False):
    """
    For each molecule in gen_vecs finds closest molecule in stock_vecs.
    Returns average tanimoto score for between these molecules

    Parameters:
        stock_vecs: numpy array <n_vectors x dim>
        gen_vecs: numpy array <n_vectors' x dim>
        agg: max or mean
        p: power for averaging: (mean x^p)^(1/p)
    """
    assert agg in ['max', 'mean'], "Can aggregate only max or mean"
    agg_tanimoto = np.zeros(len(gen_vecs))
    total = np.zeros(len(gen_vecs))
    for j in range(0, stock_vecs.shape[0], batch_size):
        x_stock = torch.tensor(stock_vecs[j:j + batch_size]).to(device).float()
        for i in range(0, gen_vecs.shape[0], batch_size):
            
            y_gen = torch.tensor(gen_vecs[i:i + batch_size]).to(device).float()
            y_gen = y_gen.transpose(0, 1)
            tp = torch.mm(x_stock, y_gen)
            jac = (tp / (x_stock.sum(1, keepdim=True) +
                         y_gen.sum(0, keepdim=True) - tp)).cpu().numpy()
            jac[np.isnan(jac)] = 1
            if p != 1:
                jac = jac**p
            if agg == 'max':
                agg_tanimoto[i:i + y_gen.shape[1]] = np.maximum(
                    agg_tanimoto[i:i + y_gen.shape[1]], jac.max(0))
            elif agg == 'mean':
                agg_tanimoto[i:i + y_gen.shape[1]] += jac.sum(0)
                total[i:i + y_gen.shape[1]] += jac.shape[0]
    if agg == 'mean':
        agg_tanimoto /= total
    if p != 1:
        agg_tanimoto = (agg_tanimoto)**(1/p)
    if intdiv:
        return agg_tanimoto
    else:
        return np.mean(agg_tanimoto), np.std(agg_tanimoto)

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np

# Calculate Morgan fingerprints for akt1_actives canonical_smiles
akt1_actives_mols = [Chem.MolFromSmiles(smiles) for smiles in akt1_actives['canonical_smiles']]
akt1_actives_morgan_fps = []
for mol in akt1_actives_mols:
    if mol is not None:
        akt1_actives_morgan_fps.append(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
    else:
        akt1_actives_morgan_fps.append(np.zeros(1024))

# Calculate Morgan fingerprints for med_chem smiles
med_chem_mols = [Chem.MolFromSmiles(smiles) for smiles in data['smiles']]
med_chem_morgan_fps = []
for mol in med_chem_mols:
    if mol is not None:
        med_chem_morgan_fps.append(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
    else:
        med_chem_morgan_fps.append(np.zeros(1024))

print(f"AKT1 actives Morgan fingerprints calculated: {len(akt1_actives_morgan_fps)}")
print(f"Med chem Morgan fingerprints calculated: {len(med_chem_morgan_fps)}")


In [None]:
import numpy as np
sim_res = average_agg_tanimoto(np.array(akt1_actives_morgan_fps), np.array(med_chem_morgan_fps))
sim_res

In [None]:
import pandas as pd

In [None]:
import pandas as pd

data = pd.read_csv("/Users/atabeyunlu/multi_agent_paper/chembl_mols/chembl_mols_full.csv", sep=";", usecols=["ChEMBL ID", "Smiles"])


In [None]:
data.Smiles[:10].tolist()

In [22]:
from rdkit import Chem
import pandas as pd


def get_canonical_smiles(smiles_list):
    """
    Convert a list of SMILES strings to their canonical representations using RDKit.
    
    Args:
        smiles_list (list): List of SMILES strings
        
    Returns:
        list: List of canonical SMILES strings, None for invalid SMILES
    """
    canonical_smiles = []
    
    for smiles in smiles_list:
        if pd.isna(smiles) or smiles == '':
            canonical_smiles.append(None)
            continue
            
        mol = Chem.MolFromSmiles(smiles)
        if mol is not None:
            canonical_smiles.append(Chem.MolToSmiles(mol, canonical=True, isomericSmiles= False))
        else:
            canonical_smiles.append(None)
    
    return canonical_smiles


In [None]:
import os
import pandas as pd
from rdkit import Chem

# Paths to the CSV files
paths = [
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7/unique_ordered_molecules.csv",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7-single-agent/unique_ordered_molecules.csv", 
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7-llm-only/unique_ordered_molecules.csv"
]

# Make the reference data canonical
print("Making reference data canonical...")
data['canonical_smiles'] = get_canonical_smiles(data['Smiles'].tolist())
reference_smiles = set([s for s in data['canonical_smiles'] if s is not None])
print(f"Reference set size: {len(reference_smiles)}")


In [41]:

# Process each path
for i, path in enumerate(paths):
    if os.path.exists(path):
        print(f"\nProcessing path {i+1}: {path}")
        
        # Read the CSV
        df = pd.read_csv(path)
        
        # Make SMILES canonical
        # Filter for AI Expert and Medicinal Chemist agents
        df_filtered = df[df['agent'].isin(['LLM Drug Designer'])]
        df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered['smiles'].tolist())
        
        # Remove None values
        valid_smiles = [s for s in df_filtered['canonical_smiles'] if s is not None]
        
        # Calculate novelty
        novel_smiles = [s for s in valid_smiles if s not in reference_smiles]
        non_novel_smiles = [s for s in valid_smiles if s in reference_smiles]
        novelty_rate = len(novel_smiles) / len(valid_smiles) if valid_smiles else 0
        
        print(f"Total molecules: {len(df)}")
        print(f"Valid molecules: {len(valid_smiles)}")
        print(f"Novel molecules: {len(novel_smiles)}")
        print(f"Non-novel molecules: {non_novel_smiles}")
        print(f"Novelty rate: {novelty_rate:.3f}")
        
    else:
        print(f"Path not found: {path}")



Processing path 1: /Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7/unique_ordered_molecules.csv
Total molecules: 132
Valid molecules: 0
Novel molecules: 0
Non-novel molecules: []
Novelty rate: 0.000

Processing path 2: /Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7-single-agent/unique_ordered_molecules.csv
Total molecules: 86
Valid molecules: 0
Novel molecules: 0
Non-novel molecules: []
Novelty rate: 0.000

Processing path 3: /Users/atabeyunlu/multi_agent_paper/runs_metadata/single_multi_llm_runs/sonnet-3-7-llm-only/unique_ordered_molecules.csv
Total molecules: 101
Valid molecules: 101
Novel molecules: 98
Non-novel molecules: ['Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1', 'Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1', 'Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1']
Novelty rate: 0.970


In [42]:
# Additional paths to process
additional_paths = [
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/gpt4.1_runs", 
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/o3_runs",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/iteration_runs",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_3_7_merged",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet-3-7",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet-3.7-genmol",
    "/Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet-4"
]

# Process additional paths
print("\n" + "="*50)
print("PROCESSING ADDITIONAL PATHS")
print("="*50)

for i, base_path in enumerate(additional_paths):
    if os.path.exists(base_path):
        print(f"\nProcessing directory {i+1}: {base_path}")
        
        # Look for CSV files in the directory
        csv_files = []
        for root, dirs, files in os.walk(base_path):
            for file in files:
                if file.endswith('.csv') and 'unique_ordered_molecules' in file:
                    csv_files.append(os.path.join(root, file))
        
        if csv_files:
            print(f"Found {len(csv_files)} CSV files:")
            for csv_file in csv_files:
                print(f"  - {csv_file}")
                
                # Read the CSV
                try:
                    df = pd.read_csv(csv_file)
                    
                    # Check if 'smiles' column exists
                    smiles_col = 'smiles' if 'smiles' in df.columns else 'Smiles'
                    
                    # Make SMILES canonical
                    # Filter for AI Expert and Medicinal Chemist agents
                    df_filtered = df[df['agent'].isin(['AI Expert', 'Medicinal Chemist'])]
                    df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
                    
                    # Remove None values
                    valid_smiles = [s for s in df_filtered['canonical_smiles'] if s is not None]
                    
                    # Calculate novelty
                    novel_smiles = [s for s in valid_smiles if s not in reference_smiles]
                    novelty_rate = len(novel_smiles) / len(valid_smiles) if valid_smiles else 0
                    
                    print(f"    Total molecules: {len(df)}")
                    print(f"    Valid molecules: {len(valid_smiles)}")
                    print(f"    Novel molecules: {len(novel_smiles)}")
                    print(f"    Novelty rate: {novelty_rate:.3f}")
                    
                except Exception as e:
                    print(f"    Error processing {csv_file}: {e}")
        else:
            print("  No CSV files found with 'unique_ordered_molecules' in filename")
            
    else:
        print(f"Directory not found: {base_path}")

print("\n" + "="*50)
print("NOVELTY ANALYSIS COMPLETE")
print("="*50)



PROCESSING ADDITIONAL PATHS

Processing directory 1: /Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs
Found 4 CSV files:
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs/unique_ordered_molecules.csv
    Total molecules: 99
    Valid molecules: 69
    Novel molecules: 63
    Novelty rate: 0.913
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs/0614_1214_DA9_gemini_3/unique_ordered_molecules.csv
    Total molecules: 35
    Valid molecules: 25
    Novel molecules: 19
    Novelty rate: 0.760
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs/0616_1950_IM0_gemini_3/unique_ordered_molecules.csv
    Total molecules: 33
    Valid molecules: 23
    Novel molecules: 23
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/gemini_runs/0615_1806_6S1_gemini_3/unique_ordered_molecules.csv
    Total molecules: 31
    Valid molecules: 21
    Novel molecules: 21
    Novelty rate: 1.000

Processing directory 2: /Users/

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smil

    Total molecules: 40
    Valid molecules: 30
    Novel molecules: 30
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/iteration_runs/0624_1322_IX1_sonnet-3.7_7/unique_ordered_molecules.csv
    Total molecules: 57
    Valid molecules: 46
    Novel molecules: 45
    Novelty rate: 0.978
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/iteration_runs/0624_1330_KP1_sonnet-3.7_7/unique_ordered_molecules.csv
    Total molecules: 73
    Valid molecules: 63
    Novel molecules: 63
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/iteration_runs/0624_1854_OFF_sonnet-3.7_9/unique_ordered_molecules.csv
    Total molecules: 87
    Valid molecules: 77
    Novel molecules: 77
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/iteration_runs/0624_1302_UIL_sonnet-3.7_7/unique_ordered_molecules.csv
    Total molecules: 81
    Valid molecules: 71
    Novel molecules: 71
    Novelty rate: 1.000
  - /Users/atabeyunl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smil

    Total molecules: 888
    Valid molecules: 683
    Novel molecules: 682
    Novelty rate: 0.999
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_3_7_merged/0618_2215_GV4_sonnet-3.7_3/unique_ordered_molecules.csv
    Total molecules: 34
    Valid molecules: 24
    Novel molecules: 24
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_3_7_merged/0618_1553_SQR_sonnet-3.7_3/unique_ordered_molecules.csv
    Total molecules: 46
    Valid molecules: 36
    Novel molecules: 36
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_3_7_merged/0618_1123_J16_sonnet-3.7_3/unique_ordered_molecules.csv
    Total molecules: 44
    Valid molecules: 33
    Novel molecules: 33
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_3_7_merged/0618_2346_5JY_sonnet-3.7_3/unique_ordered_molecules.csv
    Total molecules: 50
    Valid molecules: 40
    Novel molecules: 40
    Novelty rate: 1.000
  - /

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smil

    Total molecules: 40
    Valid molecules: 30
    Novel molecules: 30
    Novelty rate: 1.000

Processing directory 6: /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged
Found 21 CSV files:
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/unique_ordered_molecules.csv
    Total molecules: 1133
    Valid molecules: 890
    Novel molecules: 882
    Novelty rate: 0.991
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0617_1942_A5F_sonnet-4_3/unique_ordered_molecules.csv
    Total molecules: 41
    Valid molecules: 31
    Novel molecules: 30
    Novelty rate: 0.968
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0612_2010_OUU_sonnet-4_3/unique_ordered_molecules.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smil

    Total molecules: 41
    Valid molecules: 31
    Novel molecules: 31
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0617_2122_SQ6_sonnet-4_3/unique_ordered_molecules.csv
    Total molecules: 41
    Valid molecules: 29
    Novel molecules: 29
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0617_1833_G74_sonnet-4_3/unique_ordered_molecules.csv
    Total molecules: 45
    Valid molecules: 35
    Novel molecules: 35
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0617_2312_4GK_sonnet-4_3/unique_ordered_molecules.csv
    Total molecules: 43
    Valid molecules: 33
    Novel molecules: 33
    Novelty rate: 1.000
  - /Users/atabeyunlu/multi_agent_paper/runs_metadata/sonnet_4_merged/0617_2239_FA8_sonnet-4_3/unique_ordered_molecules.csv
    Total molecules: 52
    Valid molecules: 42
    Novel molecules: 41
    Novelty rate: 0.976
  - /Users/atabeyunlu/mu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smiles'] = get_canonical_smiles(df_filtered[smiles_col].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['canonical_smil

In [3]:
from molbloom import buy
buy('O=CN1C(C2CCCC2)C(=O)N1CC=C3C=C(NC4CC(N5CCC6=CC=C(O)C(F)=C6C5=O)CC4)C=C3O',catalog="zinc20", canonicalize=True)

False