# Network Pharmacology - Molecular Docking

This notebook performs automated molecular docking using AutoDock Vina.

**Requirements:**
- Upload your `compounds.csv` with SMILES
- Specify target protein PDB IDs

**Output:**
- Docking scores for each compound-target pair
- Binding pose visualizations

## 1. Install Dependencies

In [None]:
# Install required packages
!pip install -q vina meeko rdkit-pypi biopython py3Dmol pandas numpy
!apt-get install -qq openbabel
print("✅ Dependencies installed successfully!")

## 2. Configuration

In [None]:
# ============================================
# CONFIGURATION - EDIT THIS SECTION
# ============================================

# Project info (for file naming)
PROJECT_NAME = "mahkota_dewa_dn"

# Target proteins (PDB IDs)
# These should be hub genes/key targets from network analysis
TARGET_PROTEINS = {
    "AKT1": "3O96",      # Example: AKT1 kinase
    "TNF": "2AZ5",       # Example: TNF-alpha
    "VEGFA": "4KZN",     # Example: VEGF
    # Add more targets as needed
}

# Docking parameters
EXHAUSTIVENESS = 8  # 8-32, higher = more thorough
NUM_MODES = 9       # Number of binding modes to generate

# Box size (Angstroms) - centered on binding site
BOX_SIZE = (25, 25, 25)

print(f"Project: {PROJECT_NAME}")
print(f"Targets: {list(TARGET_PROTEINS.keys())}")

## 3. Upload Data Files

In [None]:
from google.colab import files
import pandas as pd
import os

# Create working directories
os.makedirs("input", exist_ok=True)
os.makedirs("proteins", exist_ok=True)
os.makedirs("ligands", exist_ok=True)
os.makedirs("results", exist_ok=True)

print("Upload your compounds.csv file:")
uploaded = files.upload()

# Load compounds
for filename in uploaded.keys():
    compounds_df = pd.read_csv(filename)
    print(f"\n✅ Loaded {len(compounds_df)} compounds")
    print(compounds_df[["name", "smiles"]].head())

## 4. Download Protein Structures from PDB

In [None]:
from Bio.PDB import PDBList
import requests

def download_pdb(pdb_id, output_dir="proteins"):
    """Download PDB file from RCSB."""
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    
    if response.status_code == 200:
        filepath = os.path.join(output_dir, f"{pdb_id}.pdb")
        with open(filepath, "w") as f:
            f.write(response.text)
        return filepath
    else:
        print(f"Failed to download {pdb_id}")
        return None

# Download all target proteins
protein_files = {}
for gene, pdb_id in TARGET_PROTEINS.items():
    print(f"Downloading {gene} ({pdb_id})...", end=" ")
    filepath = download_pdb(pdb_id)
    if filepath:
        protein_files[gene] = filepath
        print("✅")
    else:
        print("❌")

print(f"\n✅ Downloaded {len(protein_files)} protein structures")

## 5. Prepare Proteins (Remove Water, Add Hydrogens)

In [None]:
def prepare_protein(pdb_file, output_pdbqt):
    """Prepare protein for docking using Open Babel."""
    import subprocess
    
    # Remove water and heteroatoms, add hydrogens
    # Using obabel for conversion to PDBQT
    cmd = f"obabel {pdb_file} -O {output_pdbqt} -xr -h"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    
    if os.path.exists(output_pdbqt):
        return output_pdbqt
    return None

# Prepare all proteins
prepared_proteins = {}
for gene, pdb_file in protein_files.items():
    print(f"Preparing {gene}...", end=" ")
    pdbqt_file = pdb_file.replace(".pdb", ".pdbqt")
    result = prepare_protein(pdb_file, pdbqt_file)
    if result:
        prepared_proteins[gene] = pdbqt_file
        print("✅")
    else:
        print("❌")

print(f"\n✅ Prepared {len(prepared_proteins)} proteins")

## 6. Prepare Ligands (Compounds)

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
import subprocess

def smiles_to_pdbqt(smiles, name, output_dir="ligands"):
    """Convert SMILES to PDBQT format."""
    try:
        # Generate 3D structure
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return None
        
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol, randomSeed=42)
        AllChem.MMFFOptimizeMolecule(mol)
        
        # Save as SDF
        sdf_file = os.path.join(output_dir, f"{name}.sdf")
        writer = Chem.SDWriter(sdf_file)
        writer.write(mol)
        writer.close()
        
        # Convert to PDBQT
        pdbqt_file = os.path.join(output_dir, f"{name}.pdbqt")
        cmd = f"obabel {sdf_file} -O {pdbqt_file} -h"
        subprocess.run(cmd, shell=True, capture_output=True)
        
        if os.path.exists(pdbqt_file):
            return pdbqt_file
            
    except Exception as e:
        print(f"Error processing {name}: {e}")
    
    return None

# Prepare all ligands
ligand_files = {}
compounds_with_smiles = compounds_df[compounds_df["smiles"].notna()]

print(f"Preparing {len(compounds_with_smiles)} ligands...")
for _, row in compounds_with_smiles.iterrows():
    name = row["name"].replace(" ", "_")
    smiles = row["smiles"]
    
    result = smiles_to_pdbqt(smiles, name)
    if result:
        ligand_files[row["name"]] = result
        print(f"  ✅ {name}")
    else:
        print(f"  ❌ {name}")

print(f"\n✅ Prepared {len(ligand_files)} ligands")

## 7. Calculate Binding Site Center

In [None]:
from Bio.PDB import PDBParser
import numpy as np

def get_protein_center(pdb_file):
    """Calculate the geometric center of the protein."""
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)
    
    coords = []
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    coords.append(atom.get_coord())
    
    coords = np.array(coords)
    center = coords.mean(axis=0)
    
    return center

# Calculate centers for all proteins
protein_centers = {}
for gene, pdb_file in protein_files.items():
    center = get_protein_center(pdb_file)
    protein_centers[gene] = center
    print(f"{gene}: Center = ({center[0]:.2f}, {center[1]:.2f}, {center[2]:.2f})")

## 8. Run Molecular Docking

In [None]:
from vina import Vina
from tqdm import tqdm

def run_docking(receptor_pdbqt, ligand_pdbqt, center, box_size, output_pdbqt):
    """Run Vina docking."""
    try:
        v = Vina(sf_name='vina')
        v.set_receptor(receptor_pdbqt)
        v.set_ligand_from_file(ligand_pdbqt)
        
        v.compute_vina_maps(
            center=center.tolist(),
            box_size=list(box_size)
        )
        
        v.dock(exhaustiveness=EXHAUSTIVENESS, n_poses=NUM_MODES)
        
        # Get best score
        energies = v.energies()
        best_score = energies[0][0] if energies else None
        
        # Save poses
        v.write_poses(output_pdbqt, n_poses=NUM_MODES)
        
        return best_score
        
    except Exception as e:
        print(f"Docking error: {e}")
        return None

# Run docking for all compound-target pairs
results = []

total_pairs = len(prepared_proteins) * len(ligand_files)
print(f"Running {total_pairs} docking simulations...\n")

progress = tqdm(total=total_pairs)

for gene, receptor in prepared_proteins.items():
    center = protein_centers[gene]
    
    for compound, ligand in ligand_files.items():
        compound_clean = compound.replace(" ", "_")
        output = f"results/{gene}_{compound_clean}_docked.pdbqt"
        
        score = run_docking(receptor, ligand, center, BOX_SIZE, output)
        
        results.append({
            "compound": compound,
            "target": gene,
            "pdb_id": TARGET_PROTEINS[gene],
            "binding_affinity": score,
            "output_file": output if score else None
        })
        
        progress.update(1)

progress.close()
print("\n✅ Docking complete!")

## 9. Analyze Results

In [None]:
# Create results DataFrame
results_df = pd.DataFrame(results)

# Sort by binding affinity (more negative = better)
results_df = results_df.sort_values("binding_affinity")

print("\n" + "="*60)
print("DOCKING RESULTS SUMMARY")
print("="*60)

# Best compounds for each target
print("\nTop 3 compounds per target:")
for gene in TARGET_PROTEINS.keys():
    print(f"\n{gene}:")
    top = results_df[results_df["target"] == gene].head(3)
    for _, row in top.iterrows():
        print(f"  {row['compound']}: {row['binding_affinity']:.2f} kcal/mol")

# Overall best
print("\n" + "-"*60)
print("Overall Top 10 Compound-Target Pairs:")
print(results_df.head(10).to_string(index=False))

# Save results
results_df.to_csv(f"results/{PROJECT_NAME}_docking_results.csv", index=False)
print(f"\n✅ Results saved to results/{PROJECT_NAME}_docking_results.csv")

## 10. Visualize Best Binding Poses

In [None]:
import py3Dmol

def visualize_docking(protein_pdb, docked_pdbqt, title=""):
    """Visualize protein-ligand complex."""
    viewer = py3Dmol.view(width=800, height=600)
    
    # Load protein
    with open(protein_pdb, "r") as f:
        protein_data = f.read()
    viewer.addModel(protein_data, "pdb")
    viewer.setStyle({"model": 0}, {"cartoon": {"color": "lightblue"}})
    
    # Load ligand
    with open(docked_pdbqt, "r") as f:
        ligand_data = f.read()
    viewer.addModel(ligand_data, "pdbqt")
    viewer.setStyle({"model": 1}, {"stick": {"colorscheme": "greenCarbon"}})
    
    viewer.zoomTo()
    
    if title:
        print(f"\n{title}")
    
    return viewer.show()

# Visualize top 3 results
print("\nTop 3 Binding Poses:")
for i, row in results_df.head(3).iterrows():
    if row["output_file"] and os.path.exists(row["output_file"]):
        gene = row["target"]
        protein = protein_files.get(gene)
        if protein:
            title = f"{row['compound']} + {gene} (Affinity: {row['binding_affinity']:.2f} kcal/mol)"
            visualize_docking(protein, row["output_file"], title)

## 11. Download Results

In [None]:
import shutil

# Create zip archive
shutil.make_archive(f"{PROJECT_NAME}_docking_results", "zip", "results")

# Download
files.download(f"{PROJECT_NAME}_docking_results.zip")
print("✅ Results downloaded!")