# naturaDock Example Workflow

This notebook demonstrates a complete workflow using the `naturaDock` pipeline, from preparing input files to running the docking simulation and analyzing the results.

## 1. Setup and Dependencies

First, ensure you have all the necessary dependencies installed. You can install them using pip:

```bash
pip install -r requirements.txt
```

Also, make sure AutoDock Vina is installed and accessible. The `vina.exe` (or `vina` on Linux/macOS) executable should be in your system's PATH or explicitly specified in the `vina_dock.py` module.

## 2. Import Required Libraries and Check Dependencies


In [None]:
import sys
import os
import subprocess
import shutil
from pathlib import Path

# Check and install required packages
required_packages = ['toml', 'pandas', 'matplotlib']
for package in required_packages:
    try:
        __import__(package)
        print(f"✓ {package} is available")
    except ImportError:
        print(f"⚠ Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Now import the packages
import toml
import pandas as pd
from IPython.display import Image, display

print("All dependencies loaded successfully!")

## 3. Setup Project Structure and Paths


In [None]:
# Get current working directory (notebook location)
current_dir = Path.cwd()
print(f"Current directory: {current_dir}")

# Try to find project root by looking for common project files
def find_project_root(start_path):
    """Find project root by looking for key files"""
    path = Path(start_path)
    for parent in [path] + list(path.parents):
        # Look for common project indicators
        indicators = ['setup.py', 'pyproject.toml', 'requirements.txt', 'src', '.git']
        if any((parent / indicator).exists() for indicator in indicators):
            return parent
    return path  # fallback to current directory

project_root = find_project_root(current_dir)
print(f"Project root: {project_root}")

# Check for naturaDock source
naturarock_src = project_root / "src" / "naturaDock"
if naturarock_src.exists():
    print(f"✓ Found naturaDock source at: {naturarock_src}")
else:
    print(f"⚠ naturaDock source not found at expected location: {naturarock_src}")
    print("Please adjust the project_root path or ensure the notebook is in the correct location")

## 4. Check for AutoDock Vina Installation


In [None]:
def find_vina_executable():
    """Find AutoDock Vina executable"""
    possible_names = ['vina', 'vina.exe']
    possible_paths = [
        # Common installation paths
        "C:\\Program Files\\AutoDockVina\\vina.exe",
        "C:\\Program Files (x86)\\AutoDockVina\\vina.exe",
        "/usr/local/bin/vina",
        "/usr/bin/vina",
        "./vina",
        "./vina.exe"
    ]
    
    # Check if vina is in PATH
    for name in possible_names:
        if shutil.which(name):
            return shutil.which(name)
    
    # Check specific paths
    for path in possible_paths:
        if Path(path).exists():
            return path
    
    return None

vina_path = find_vina_executable()
if vina_path:
    print(f"✓ Found AutoDock Vina at: {vina_path}")
else:
    print("⚠ AutoDock Vina not found. Please install it or specify the path manually.")
    print("Download from: https://autodock.scripps.edu/downloads/")
    # Prompt user to specify path
    vina_path = input("Enter path to Vina executable (or press Enter to continue anyway): ").strip()
    if not vina_path:
        vina_path = "vina"  # fallback

## 5. Create Example Data (if not present)


In [None]:
# Create example data directory
example_data_dir = project_root / "docs" / "tutorial" / "example_data"
example_data_dir.mkdir(parents=True, exist_ok=True)

# Create a simple example protein PDB file if it doesn't exist
protein_file = example_data_dir / "protein.pdb"
if not protein_file.exists():
    protein_content = """HEADER    EXAMPLE PROTEIN                         01-JAN-00   XXXX              
ATOM      1  N   ALA A   1      20.154  18.110  16.717  1.00 20.00           N  
ATOM      2  CA  ALA A   1      19.030  17.295  17.206  1.00 20.00           C  
ATOM      3  C   ALA A   1      18.454  17.922  18.471  1.00 20.00           C  
ATOM      4  O   ALA A   1      18.778  18.981  18.879  1.00 20.00           O  
ATOM      5  CB  ALA A   1      17.958  17.081  16.139  1.00 20.00           C  
END
"""
    with open(protein_file, 'w') as f:
        f.write(protein_content)
    print(f"✓ Created example protein file: {protein_file}")
else:
    print(f"✓ Protein file already exists: {protein_file}")

# Create a simple example compounds SDF file if it doesn't exist
compounds_file = example_data_dir / "compounds.sdf"
if not compounds_file.exists():
    compounds_content = """Compound_001
  
  
  1  0  0  0  0  0  0  0  0  0  1 V2000
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
M  END
> <ID>
Compound_001

$$$$
Compound_002
  
  
  2  1  0  0  0  0  0  0  0  0  1 V2000
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0  0  0  0
M  END
> <ID>
Compound_002

$$$$
"""
    with open(compounds_file, 'w') as f:
        f.write(compounds_content)
    print(f"✓ Created example compounds file: {compounds_file}")
else:
    print(f"✓ Compounds file already exists: {compounds_file}")

## 6. Define Configuration


In [None]:
config_content = {
    "protein": str(protein_file),
    "ligands": str(compounds_file),
    "output": "./output_example",
    "size_x": 20.0,
    "size_y": 20.0,
    "size_z": 20.0,
    "max_mol_weight": 500.0,
    "max_rotatable_bonds": 10,
    "min_logp": -5.0,
    "max_logp": 5.0,
    "export_format": "csv",
    "num_workers": 2,
    "log_file": "./output_example/naturaDock_example.log",
    "verbose": True
}

output_dir = Path("./output_example")
output_dir.mkdir(exist_ok=True)

config_path = output_dir / "config.toml"
with open(config_path, "w") as f:
    toml.dump(config_content, f)

print(f"✓ Configuration file created at: {config_path}")
print(f"Configuration content:")
for key, value in config_content.items():
    print(f"  {key}: {value}")

## 7. Run the naturaDock Pipeline


In [None]:
# Setup environment for subprocess
env = os.environ.copy()
env["PYTHONPATH"] = str(project_root / "src")
env["VINA_EXECUTABLE"] = vina_path

# Construct command
main_script = naturarock_src / "main.py"
if not main_script.exists():
    print(f"⚠ Main script not found at: {main_script}")
    print("Please check the naturaDock installation and adjust paths accordingly.")
else:
    command = [
        sys.executable,  # Use the same Python interpreter as the notebook
        str(main_script),
        "--config",
        str(config_path)
    ]

    print(f"Running command: {' '.join(command)}")
    print(f"Environment variables:")
    print(f"  PYTHONPATH: {env.get('PYTHONPATH')}")
    print(f"  VINA_EXECUTABLE: {env.get('VINA_EXECUTABLE')}")
    print("\nExecuting pipeline...\n")
    
    try:
        result = subprocess.run(command, capture_output=True, text=True, env=env, cwd=str(project_root))
        
        print("=== STDOUT ===")
        print(result.stdout)
        print("\n=== STDERR ===")
        print(result.stderr)
        
        if result.returncode == 0:
            print("\n✓ Pipeline executed successfully!")
        else:
            print(f"\n⚠ Pipeline failed with return code: {result.returncode}")
    except Exception as e:
        print(f"Error running pipeline: {e}")

## 8. Analyze Results


In [None]:
# Check what files were generated
print("Generated files:")
if output_dir.exists():
    for file in output_dir.iterdir():
        if file.is_file():
            print(f"  📄 {file.name} ({file.stat().st_size} bytes)")
        elif file.is_dir():
            print(f"  📁 {file.name}/")
else:
    print("  No output directory found")

# Load and display ranked results
ranked_results_path = output_dir / "ranked_results.csv"
if ranked_results_path.exists():
    try:
        df_results = pd.read_csv(ranked_results_path)
        print(f"\n=== Ranked Results ({len(df_results)} compounds) ===")
        print(df_results.head(10))  # Show top 10
        
        if len(df_results) > 0:
            print(f"\nBest binding affinity: {df_results.iloc[0]['binding_affinity']} kcal/mol")
            print(f"Worst binding affinity: {df_results.iloc[-1]['binding_affinity']} kcal/mol")
    except Exception as e:
        print(f"Error reading results: {e}")
else:
    print(f"\n⚠ Ranked results file not found at {ranked_results_path}")

## 9. Display Statistical Summary and Plots


In [None]:
# Display statistical summary
statistical_summary_path = output_dir / "statistical_summary.txt"
if statistical_summary_path.exists():
    print("=== Statistical Summary ===")
    with open(statistical_summary_path, "r") as f:
        print(f.read())
else:
    print(f"⚠ Statistical summary file not found at {statistical_summary_path}")

# Display distribution plot
plot_path = output_dir / "docking_scores_distribution.png"
if plot_path.exists():
    print("\n=== Docking Scores Distribution Plot ===")
    try:
        display(Image(filename=str(plot_path)))
    except Exception as e:
        print(f"Error displaying plot: {e}")
else:
    print(f"\n⚠ Distribution plot not found at {plot_path}")

## 10. Log File Analysis


In [None]:
# Display log file contents for debugging
log_path = output_dir / "naturaDock_example.log"
if log_path.exists():
    print("=== Log File Contents ===")
    with open(log_path, "r") as f:
        log_contents = f.read()
        # Show last 50 lines to avoid overwhelming output
        lines = log_contents.split('\n')
        if len(lines) > 50:
            print(f"... (showing last 50 lines of {len(lines)} total lines) ...")
            print('\n'.join(lines[-50:]))
        else:
            print(log_contents)
else:
    print(f"⚠ Log file not found at {log_path}")

## 11. Clean Up (Optional)


In [None]:
# Uncomment the lines below to clean up the generated files
# import shutil
# if output_dir.exists():
#     shutil.rmtree(output_dir)
#     print(f"🧹 Cleaned up output directory: {output_dir}")

print("\n=== Workflow Complete ===")
print(f"Results saved in: {output_dir}")
print("You can now analyze the docking results and proceed with your research!")