# Molecular Property Analyzer â€“ Visualizations

This notebook:
- Uses RDKit to calculate molecular properties and drug-likeness
- Evaluates molecules against Lipinski's Rule of Five
- Calculates QED (Quantitative Estimate of Drug-likeness)
- Visualizes molecular property distributions and comparisons


In [None]:
from rdkit import Chem
from rdkit.Chem import Descriptors, Crippen, Lipinski, QED, rdMolDescriptors
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class MolecularPropertyAnalyzer:
    def __init__(self):
        self.properties = [
            'MolecularWeight', 'LogP', 'NumHDonors', 'NumHAcceptors',
            'TPSA', 'NumRotatableBonds', 'NumHeteroatoms',
            'NumAliphaticRings', 'NumAromaticRings', 'QED'
        ]

    def smiles_to_mol(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            raise ValueError(f"Invalid SMILES string: {smiles}")
        return mol

    def calculate_basic_properties(self, mol):
        return {
            'MolecularWeight': Descriptors.MolWt(mol),
            'LogP': Crippen.MolLogP(mol),
            'NumHDonors': Lipinski.NumHDonors(mol),
            'NumHAcceptors': Lipinski.NumHAcceptors(mol),
            'TPSA': rdMolDescriptors.CalcTPSA(mol),
            'NumRotatableBonds': Lipinski.NumRotatableBonds(mol),
            'NumHeteroatoms': rdMolDescriptors.CalcNumHeteroatoms(mol),
            'NumAliphaticRings': rdMolDescriptors.CalcNumAliphaticRings(mol),
            'NumAromaticRings': rdMolDescriptors.CalcNumAromaticRings(mol),
            'QED': QED.qed(mol)
        }

    def evaluate_lipinski(self, props):
        violations = []
        if props['MolecularWeight'] > 500: violations.append('MW>500')
        if props['LogP'] > 5: violations.append('LogP>5')
        if props['NumHDonors'] > 5: violations.append('HDonors>5')
        if props['NumHAcceptors'] > 10: violations.append('HAcceptors>10')
        return {'LipinskiPass': len(violations)==0, 'LipinskiViolations': violations}

    def analyze_molecule(self, smiles):
        mol = self.smiles_to_mol(smiles)
        basic_props = self.calculate_basic_properties(mol)
        lipinski = self.evaluate_lipinski(basic_props)
        results = {'SMILES': smiles}
        results.update(basic_props)
        results.update(lipinski)
        return results

    def analyze_batch(self, smiles_list):
        data = []
        for s in smiles_list:
            try:
                data.append(self.analyze_molecule(s))
            except ValueError as e:
                print(f"Error: {e}")
        return pd.DataFrame(data)


## Demo Analysis of Example Drugs

We'll use a small set of example drugs to demonstrate calculations and visualizations.

In [None]:
# Example molecules
examples = {
    'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O',
    'Ibuprofen': 'CC(C)Cc1ccc(cc1)C(C)C(=O)O',
    'Caffeine': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
    'Penicillin G': 'CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O'
}

analyzer = MolecularPropertyAnalyzer()
df = analyzer.analyze_batch(list(examples.values()))
df['Name'] = list(examples.keys())
df

## Visualizations

We can now plot molecular properties, Lipinski compliance, and QED scores.

In [None]:
# Scatter plot: MolecularWeight vs LogP
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='MolecularWeight', y='LogP', hue='LipinskiPass', s=150)
plt.title('Molecular Weight vs LogP with Lipinski Pass')
plt.xlabel('Molecular Weight')
plt.ylabel('LogP')
plt.show()

# Histogram of QED scores
plt.figure(figsize=(8,6))
sns.histplot(df['QED'], bins=10, kde=True, color='skyblue')
plt.title('QED Score Distribution')
plt.xlabel('QED')
plt.ylabel('Count')
plt.show()

# Bar plot: Lipinski violations
df['NumViolations'] = df['LipinskiViolations'].apply(len)
plt.figure(figsize=(8,6))
sns.barplot(data=df, x='Name', y='NumViolations', palette='pastel')
plt.title('Number of Lipinski Violations per Drug')
plt.ylabel('Number of Violations')
plt.show()