In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('talk', font_scale=1.5)
sns.set(color_codes=True)

%matplotlib inline

In [2]:
from ysi_utils.data import low
from ysi_utils.tools import descriptors, chemical_conversions

### Optimize 3D geometry in preparation for the dragon MD library

In [3]:
smiles = low.loc[:, ['SMILES', 'CAS']].drop_duplicates()

In [4]:
addl_molecules = [
    ['cyclopentanone',        '120-92-3'],
    ['2-methylbut-3-en-2-ol', '115-18-4'],
    ['2,5-dimethylfuran',     '625-86-5'],
]

molecule_df = pd.DataFrame(addl_molecules, columns=['Name', 'CAS'])
molecule_df['SMILES'] = molecule_df.CAS\
    .apply(chemical_conversions.get_smiles_from_cas)\
    .apply(chemical_conversions.canonicalize_smiles)
    
    
smiles = smiles.append(molecule_df.loc[:, ['CAS', 'SMILES']])

In [5]:
descriptors.write_sdf(smiles.SMILES, smiles.CAS,
                      output_file='dragon_input.sdf')

100%|██████████| 300/300 [00:02<00:00, 120.65it/s]


### Load resulting molecular descriptors
Generated via 
```
dragon7shell -s dragon_script.drt
dragon7shell -s dragon_script_qm.drt
```

In [20]:
dragon_descriptors = pd.read_csv('dragon_output.tsv', sep='\t', index_col=0)
dragon_descriptors.index = smiles.SMILES
dragon_descriptors.drop('NAME', 1, inplace=True)

dragon_descriptors.to_pickle('../../ysi_utils/descriptors/dragon.p')

In [21]:
dragon_descriptors_qm = pd.read_csv('dragon_output_qm.tsv', sep='\t', index_col=0)
dragon_descriptors_qm.index = pd.read_pickle('qm_input_key.p').SMILES
dragon_descriptors_qm.drop('NAME', 1, inplace=True)

dragon_descriptors_qm = dragon_descriptors_qm.loc[smiles.SMILES]

dragon_descriptors_qm.to_pickle('../../ysi_utils/descriptors/dragon_qm.p')