In [5]:
import pandas as pd
import xlsxwriter
import openpyxl
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions 
from rdkit.Chem import Descriptors

In [6]:
df = pd.read_excel('ja3c08346_si.xlsx', engine='openpyxl')
print(df.head())
import os
def genereation_images(data):
    draw = data.SMILES.tolist()
    for i in draw:
        mol = Chem.MolFromSmiles(i)
        
        # 如果不存在generation文件夹，则创建
        if not os.path.exists('./generation'):
            os.makedirs('./generation')
        Draw.MolToFile(mol, f'./generation/img{i}.png', size=(150, 100))

def load_images(data):
    workbook = xlsxwriter.Workbook('dataset_with_images.xlsx')
    worksheet = workbook.add_worksheet()
    worksheet.write('A1', 'SMILES')
    worksheet.write('B1', 'Image')
    for i, j in enumerate(data.SMILES.tolist()):
        worksheet.write(f'A{i+2}', j)
        worksheet.insert_image(f'B{i+2}', f'./generation/img{j}.png')
    workbook.close()


def insert_descriptor(data):
    workbook = openpyxl.load_workbook('dataset_with_images.xlsx')
    worksheet = workbook.active
    worksheet['C1'] = 'Molecular Formula'
    worksheet['D1'] = 'Molecular Weight'
    for i, smiles in enumerate(data.SMILES.tolist()):
        mol = Chem.MolFromSmiles(smiles)
        molecular_formula = Chem.rdMolDescriptors.CalcMolFormula(mol)
        molwt = Descriptors.MolWt(mol)
        worksheet[f'C{i+2}'] = molecular_formula
        worksheet[f'D{i+2}'] = molwt
        print(f'{smiles} {molecular_formula} {molwt}')
    workbook.save('dataset_with_images.xlsx')

genereation_images(df)
load_images(df)
insert_descriptor(df)

    SMILES
0     CCOC
1  COC(C)C
2    CCOCC
3    CCCOC
4    COCOC
CCOC C3H8O 60.096000000000004
COC(C)C C4H10O 74.123
CCOCC C4H10O 74.123
CCCOC C4H10O 74.12299999999999
COCOC C3H8O2 76.095
C1CCOC1 C4H8O 72.107
C1COCO1 C3H6O2 74.079
COC(C)(C)C C5H12O 88.14999999999999
CCOC(C)C C5H12O 88.15
CCC(C)OC C5H12O 88.14999999999999
COC(C)OC C4H10O2 90.122
CCCOCC C5H12O 88.14999999999999
CCOCOC C4H10O2 90.122
COCC(C)C C5H12O 88.15
CCCCOC C5H12O 88.14999999999999
COCCOC C4H10O2 90.122
CC1CCCO1 C5H10O 86.134
CC1COCO1 C4H8O2 88.106
CC1OCCO1 C4H8O2 88.106
CC1CCOC1 C5H10O 86.134
C1CCOCC1 C5H10O 86.134
C1COCCO1 C4H8O2 88.106
C1COCOC1 C4H8O2 88.106
C1OCOCO1 C3H6O3 90.078
CCOC(C)(C)C C6H14O 102.17699999999999
CCC(C)(C)OC C6H14O 102.17699999999998
COC(C)(C)OC C5H12O2 104.14899999999999
CC(C)OC(C)C C6H14O 102.17699999999999
CCOC(C)CC C6H14O 102.177
CCCOC(C)C C6H14O 102.17699999999998
COCOC(C)C C5H12O2 104.14899999999999
COC(C)C(C)C C6H14O 102.17699999999999
CCC(CC)OC C6H14O 102.17699999999999
CCCC(C)OC C6H