# Molecular properties in DataFrame

## Define dictionary of molecules

In [1]:
molecules = {
    'Aspirin': 'CC(=O)OC1=CC=CC=C1C(=O)O',
    'Paracetamol': 'CC(=O)NC1=CC=C(O)C=C1',
    'Caffeine': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
    'Ibuprofen': 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O',
    'Penicillin G': 'CC1(C(=O)NC(=O)C2=CSC(N3CCOC3)=N12)C(O)=O',
    'Atorvastatin': 'CC(C)C1=CC=C(C=C1)C2=C(C(=O)N(C(=O)N2C)C(C3=CC=CC=C3)O)O',
    'Metformin': 'CNC(=N)NC(=N)N(C)C',
    'Simvastatin': 'CCC(C)C(C(=O)OC1CCC2(C1C3CC(O3)C(=O)O2)C)OC',
    'Losartan': 'CC(C)CC1=CC=C(C=C1)C2=CNC3=CC=CC=C3N2C(=O)O',
    'Amoxicillin': 'CC1(C(=O)NC(=O)C2=CSC(N3CCOC3)=N12)C(O)=O'
}

## Calculate molecular properties

In [2]:
from src.utils import MolecularPropertiesCalculator

calculator = MolecularPropertiesCalculator(molecules)
df = calculator.calculate(as_df=True)
df

[15:25:22] Explicit valence for atom # 16 N, 4, is greater than permitted
[15:25:22] Explicit valence for atom # 16 N, 4, is greater than permitted


Unnamed: 0,MW,LogP,HBD,HBA,RB,AR,TPSA,HAC,FSP3,ASA,smiles
Aspirin,180.159,1.3101,1.0,3.0,2.0,1.0,63.60,13.0,0.111111,74.757053,CC(=O)OC1=CC=CC=C1C(=O)O
Paracetamol,151.165,1.3506,2.0,2.0,1.0,1.0,49.33,11.0,0.125000,64.666905,CC(=O)NC1=CC=C(O)C=C1
...,...,...,...,...,...,...,...,...,...,...,...
Simvastatin,326.389,1.8423,0.0,6.0,5.0,0.0,71.06,23.0,0.882353,136.338057,CCC(C)C(C(=O)OC1CCC2(C1C3CC(O3)C(=O)O2)C)OC
Losartan,308.381,4.7936,2.0,2.0,3.0,2.0,52.57,23.0,0.210526,135.613110,CC(C)CC1=CC=C(C=C1)C2=CNC3=CC=CC=C3N2C(=O)O


## Ranking

### Drop SMILES column

In [3]:
df = df.drop(columns=['smiles'])
df

Unnamed: 0,MW,LogP,HBD,HBA,RB,AR,TPSA,HAC,FSP3,ASA
Aspirin,180.159,1.3101,1.0,3.0,2.0,1.0,63.60,13.0,0.111111,74.757053
Paracetamol,151.165,1.3506,2.0,2.0,1.0,1.0,49.33,11.0,0.125000,64.666905
...,...,...,...,...,...,...,...,...,...,...
Simvastatin,326.389,1.8423,0.0,6.0,5.0,0.0,71.06,23.0,0.882353,136.338057
Losartan,308.381,4.7936,2.0,2.0,3.0,2.0,52.57,23.0,0.210526,135.613110


### Sort values

In [4]:
df = df.sort_values(by='MW', ascending=False)
df

Unnamed: 0,MW,LogP,HBD,HBA,RB,AR,TPSA,HAC,FSP3,ASA
Atorvastatin,366.417,2.58220,2.0,6.0,4.0,3.0,84.46,27.0,0.238095,156.141053
Simvastatin,326.389,1.84230,0.0,6.0,5.0,0.0,71.06,23.0,0.882353,136.338057
...,...,...,...,...,...,...,...,...,...,...
Paracetamol,151.165,1.35060,2.0,2.0,1.0,1.0,49.33,11.0,0.125000,64.666905
Metformin,143.194,-0.77346,4.0,2.0,0.0,0.0,75.00,10.0,0.600000,60.429280


### Styles

In [5]:
dfs = (df
 .style
    .background_gradient(cmap='Purples', axis=0)
    .format(precision=1)
)

dfs

Unnamed: 0,MW,LogP,HBD,HBA,RB,AR,TPSA,HAC,FSP3,ASA
Atorvastatin,366.4,2.6,2.0,6.0,4.0,3.0,84.5,27.0,0.2,156.1
Simvastatin,326.4,1.8,0.0,6.0,5.0,0.0,71.1,23.0,0.9,136.3
Losartan,308.4,4.8,2.0,2.0,3.0,2.0,52.6,23.0,0.2,135.6
Ibuprofen,206.3,3.1,1.0,1.0,4.0,1.0,37.3,15.0,0.5,90.9
Caffeine,194.2,-1.0,0.0,6.0,0.0,2.0,61.8,14.0,0.4,79.0
Aspirin,180.2,1.3,1.0,3.0,2.0,1.0,63.6,13.0,0.1,74.8
Paracetamol,151.2,1.4,2.0,2.0,1.0,1.0,49.3,11.0,0.1,64.7
Metformin,143.2,-0.8,4.0,2.0,0.0,0.0,75.0,10.0,0.6,60.4


### Output

In [6]:
import dataframe_image as dfi

dfi.export(dfs, 'molecular_properties.png')