In [3]:
import pandas as pd
import pyrfume
import os
from PIL import Image, ImageOps
import base64
from io import BytesIO

In [4]:
# Get SMILES from master molecule list
molecules = pyrfume.load_data('molecules/molecules.csv')

print(molecules.shape)
molecules.head()

(9984, 4)


Unnamed: 0_level_0,MolecularWeight,IsomericSMILES,IUPACName,name
CID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-955348933095,240.387,CCCCC=COC(=O)CCCCCCCC,,Hexenyl nonanoate
-923209957509,196.29,CC(=O)OCC1C=CC(C(C)C)CC1,,Tetrahydrocuminyl acetate
-874408321546,244.331,CCCCCCCCC(OC(C)=O)C(=O)OC,,Methyl acetoxydecanoate
-873963935677,198.306,CCCCC=COC(=O)C(C)CCC,,Hexenyl methylvalerate
-862841422647,148.271,CCCC(S)COCC,,Ethoxymethylbutanethiol


In [5]:
# Calculate stucture for each unique SMILES
smiles = molecules[~molecules.index.duplicated()]['IsomericSMILES'].to_list()

im_dict = {}
for smi in set(smiles):
    try:
        image = pyrfume.odorants.smiles_to_image(smi, png=False, crop=True, padding=0, size=120)

        # Crop
        bbox = ImageOps.invert(image).getbbox()
        image = image.crop(bbox)

        # Convert to base64
        buffer = BytesIO()
        image.save(buffer, format='PNG')
        image = buffer.getvalue()
        image = base64.b64encode(image).decode("utf8")

        im_dict[smi] = image
    except:
        print(f'Could not create structure for {smi}')

Could not create structure for F[Si-2](F)(F)(F)(F)F.[Na+].[Na+]
Could not create structure for [NH4+].[NH4+].F[Si-2](F)(F)(F)(F)F


In [6]:
# Convert to dataframe
df = pd.DataFrame.from_dict(im_dict, orient='index', columns=['Image_base64'])
df.index.name = 'SMILES'

print(df.shape)
df.head()

(9969, 1)


Unnamed: 0_level_0,Image_base64
SMILES,Unnamed: 1_level_1
C12=C(C(=C(C(=C1Cl)Cl)Cl)Cl)C(=O)OC2=O,iVBORw0KGgoAAAANSUhEUgAAAG4AAABsCAIAAAAE8RCnAA...
CC(=O)C(=O)C1=CC(=C(C=C1)O)OC,iVBORw0KGgoAAAANSUhEUgAAAG4AAABDCAIAAADyC6dEAA...
CCCCCC(=O)OC(C)(CCC=C(C)C)C=C,iVBORw0KGgoAAAANSUhEUgAAAG8AAABECAIAAAAAzPzCAA...
CCC(=O)NC1=CC=CC=C1C(=O)OC,iVBORw0KGgoAAAANSUhEUgAAAG4AAABbCAIAAAAdjieyAA...
CC1=C(C=CC(=C1)Cl)N,iVBORw0KGgoAAAANSUhEUgAAAG4AAAA8CAIAAAAIfxhQAA...


In [7]:
# Write to disk
df.to_csv(os.path.join('../', 'static', 'structures.csv'))