#Part 1 - Initial Setup

In [16]:
#@title Drug Discovery Platform Setup
"""
AI-Assisted Drug Design Platform
-------------------------------
This notebook implements a web-based drug discovery platform that integrates:
- Schrodinger Software Suite integration
- Chemical structure analysis
- PDB database access
- ADME property prediction
- Neural network-based structural analysis
- Compound screening capabilities
"""

# Install required packages
!pip install openai>=1.0.0 transformers gradio pandas numpy torch scikit-learn python-dotenv rdkit py3Dmol matplotlib biopython requests Bio

# Import required libraries
import os
import json
from openai import OpenAI
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, Draw, Descriptors, rdMolDescriptors
from Bio import PDB
import requests
import gradio as gr
from typing import List, Dict, Any, Tuple
from datetime import datetime
import py3Dmol
import matplotlib.pyplot as plt
import io
import base64

#-------------------------------------------------------------------------------
# Configuration Settings
#-------------------------------------------------------------------------------

class Config:
    """Global configuration settings for the platform"""
    MAX_TOKENS = 2000
    MODEL_NAME = "gpt-4"
    TEMPERATURE = 0.7
    CACHE_SIZE = 1000
    COMPOUNDS_PER_MINUTE = 2000  # Target processing speed
    PDB_BASE_URL = "https://files.rcsb.org/download/"

#-------------------------------------------------------------------------------
# Memory Management
#-------------------------------------------------------------------------------

class MemoryManager:
    """Handles caching of results to improve performance"""

    def __init__(self, cache_size: int = Config.CACHE_SIZE):
        self.cache = {}
        self.cache_size = cache_size

    def add_to_memory(self, key: str, value: Any):
        """Add item to cache, removing oldest entry if cache is full"""
        if len(self.cache) >= self.cache_size:
            oldest_key = min(self.cache.keys(),
                           key=lambda k: self.cache[k]['timestamp'])
            del self.cache[oldest_key]
        self.cache[key] = {
            'value': value,
            'timestamp': datetime.now()
        }

    def get_from_memory(self, key: str) -> Any:
        """Retrieve item from cache if it exists"""
        if key in self.cache:
            return self.cache[key]['value']
        return None

#-------------------------------------------------------------------------------
# Error Handling
#-------------------------------------------------------------------------------

class DrugDesignError(Exception):
    """Base exception class for drug design errors"""
    pass

class StructureError(DrugDesignError):
    """Exception for molecular structure errors"""
    pass

class DockingError(DrugDesignError):
    """Exception for docking process errors"""
    pass

#Part 2 - Core Drug Design Components

In [17]:
#-------------------------------------------------------------------------------
# Protein Structure Management
#-------------------------------------------------------------------------------

class PDBManager:
    """Handles downloading and processing of protein structures from PDB"""

    @staticmethod
    def download_pdb_structure(pdb_id: str) -> str:
        """Download PDB structure from RCSB PDB database"""
        url = f"{Config.PDB_BASE_URL}{pdb_id}.pdb"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                temp_file = f"temp_{pdb_id}.pdb"
                with open(temp_file, 'w') as f:
                    f.write(response.text)
                return temp_file
            else:
                raise DrugDesignError(f"Error downloading PDB: {response.status_code}")
        except Exception as e:
            raise DrugDesignError(f"PDB download error: {str(e)}")

#-------------------------------------------------------------------------------
# Molecular Visualization
#-------------------------------------------------------------------------------

class MoleculeVisualizer:
    """Handles 2D and 3D visualization of molecular structures"""

    @staticmethod
    def generate_2d_image(smiles: str) -> str:
        """Generate 2D molecular structure image from SMILES"""
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol is None:
                raise StructureError("Invalid SMILES string")
            img = Draw.MolToImage(mol)
            buffered = io.BytesIO()
            img.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode()
        except Exception as e:
            raise DrugDesignError(f"2D visualization error: {str(e)}")

    @staticmethod
    def generate_3d_structure(smiles: str) -> str:
        """Generate 3D molecular structure visualization"""
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol is None:
                raise StructureError("Invalid SMILES string")
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol, randomSeed=42)
            AllChem.MMFFOptimizeMolecule(mol)
            return Chem.MolToXYZBlock(mol)
        except Exception as e:
            raise DrugDesignError(f"3D visualization error: {str(e)}")

#-------------------------------------------------------------------------------
# Property Calculation
#-------------------------------------------------------------------------------

class PropertyCalculator:
    """Calculates molecular properties and ADME predictions"""

    @staticmethod
    def calculate_properties(mol) -> Dict[str, float]:
        """Calculate basic molecular properties"""
        try:
            properties = {
                'MW': Descriptors.ExactMolWt(mol),
                'LogP': Descriptors.MolLogP(mol),
                'TPSA': Descriptors.TPSA(mol),
                'HBA': rdMolDescriptors.CalcNumHBA(mol),
                'HBD': rdMolDescriptors.CalcNumHBD(mol),
                'RotBonds': rdMolDescriptors.CalcNumRotatableBonds(mol),
                'QED': Descriptors.qed(mol)
            }
            return properties
        except Exception as e:
            raise DrugDesignError(f"Property calculation error: {str(e)}")

    @staticmethod
    def predict_adme(mol) -> Dict[str, float]:
        """Predict ADME properties"""
        try:
            predictions = {
                'BBB_Permeability': PropertyCalculator._predict_bbb_permeability(mol),
                'Oral_Bioavailability': PropertyCalculator._predict_oral_bioavailability(mol),
                'CYP_Inhibition': PropertyCalculator._predict_cyp_inhibition(mol)
            }
            return predictions
        except Exception as e:
            raise DrugDesignError(f"ADME prediction error: {str(e)}")

    @staticmethod
    def _predict_bbb_permeability(mol) -> float:
        """Predict blood-brain barrier permeability using simple rules"""
        mw = Descriptors.ExactMolWt(mol)
        logp = Descriptors.MolLogP(mol)
        tpsa = Descriptors.TPSA(mol)

        # Using standard BBB rules
        if mw <= 400 and logp <= 5 and tpsa <= 90:
            return 1.0
        return 0.0

    @staticmethod
    def _predict_oral_bioavailability(mol) -> float:
        """Predict oral bioavailability using Lipinski's Rule of 5"""
        mw = Descriptors.ExactMolWt(mol)
        logp = Descriptors.MolLogP(mol)
        hbd = rdMolDescriptors.CalcNumHBD(mol)
        hba = rdMolDescriptors.CalcNumHBA(mol)

        violations = 0
        if mw > 500: violations += 1
        if logp > 5: violations += 1
        if hbd > 5: violations += 1
        if hba > 10: violations += 1

        return 1.0 - (violations * 0.25)

    @staticmethod
    def _predict_cyp_inhibition(mol) -> float:
        """Predict CYP inhibition potential"""
        # Simplified prediction based on structural features
        return 0.5  # Placeholder for more sophisticated model

#Part 3 - Drug Design Agent and Integration

In [18]:
#-------------------------------------------------------------------------------
# Core Drug Design Agent
#-------------------------------------------------------------------------------

class DrugDesignAgent:
    """Main agent for drug design tasks and integration with external services"""

    def __init__(self, api_key: str, memory_manager: MemoryManager):
        self.client = OpenAI(api_key=api_key)
        self.memory_manager = memory_manager
        self.visualizer = MoleculeVisualizer()
        self.property_calculator = PropertyCalculator()

    def analyze_protein_target(self, pdb_id: str, target_description: str) -> Dict[str, Any]:
        """Analyze protein structure and identify potential binding sites"""
        cached_result = self.memory_manager.get_from_memory(f"protein_analysis_{pdb_id}")
        if cached_result:
            return cached_result

        try:
            # Download PDB structure
            pdb_file = PDBManager.download_pdb_structure(pdb_id)

            prompt = f"""Analyze this protein target for drug discovery:
            PDB ID: {pdb_id}
            Description: {target_description}

            Provide a detailed analysis including:
            1. Main binding pockets
            2. Key residues for drug interactions
            3. Type of interactions possible
            4. Special considerations for drug design
            5. Recommended chemical features for ligands"""

            response = self.client.chat.completions.create(
                model=Config.MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You are an expert in structure-based drug design."},
                    {"role": "user", "content": prompt}
                ],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )

            analysis = {
                'pdb_id': pdb_id,
                'analysis': response.choices[0].message.content,
                'timestamp': datetime.now().isoformat()
            }

            self.memory_manager.add_to_memory(f"protein_analysis_{pdb_id}", analysis)
            return analysis

        except Exception as e:
            raise DrugDesignError(f"Protein analysis error: {str(e)}")

    def screen_compounds(self, protein_target: str, compound_library: List[str],
                        screening_criteria: Dict[str, Any]) -> Dict[str, Any]:
        """Screen compound library against protein target"""
        try:
            batch_size = Config.COMPOUNDS_PER_MINUTE
            results = []

            for i in range(0, len(compound_library), batch_size):
                batch = compound_library[i:i + batch_size]

                # Process each compound in batch
                for smiles in batch:
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        continue

                    # Calculate properties
                    properties = self.property_calculator.calculate_properties(mol)

                    # Check if compound meets screening criteria
                    if self._meets_criteria(properties, screening_criteria):
                        results.append({
                            'SMILES': smiles,
                            'properties': properties
                        })

            return {
                'screened_compounds': len(compound_library),
                'hits': results,
                'timestamp': datetime.now().isoformat()
            }

        except Exception as e:
            raise DrugDesignError(f"Compound screening error: {str(e)}")

    def optimize_compound(self, smiles: str, optimization_goals: List[str]) -> Dict[str, Any]:
        """Optimize compound structure based on specified goals"""
        try:
            prompt = f"""Optimize this chemical structure (SMILES: {smiles}) for the following goals:
            {', '.join(optimization_goals)}

            Provide:
            1. Suggested modifications
            2. Expected impact on properties
            3. Modified SMILES structures
            4. Reasoning for each modification"""

            response = self.client.chat.completions.create(
                model=Config.MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You are an expert medicinal chemist."},
                    {"role": "user", "content": prompt}
                ],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )

            return {
                'original_smiles': smiles,
                'optimization_goals': optimization_goals,
                'suggestions': response.choices[0].message.content,
                'timestamp': datetime.now().isoformat()
            }

        except Exception as e:
            raise DrugDesignError(f"Compound optimization error: {str(e)}")

    def _meets_criteria(self, properties: Dict[str, float], criteria: Dict[str, Any]) -> bool:
        """Check if compound properties meet screening criteria"""
        for prop, value in criteria.items():
            if prop not in properties:
                continue
            if 'min' in value and properties[prop] < value['min']:
                return False
            if 'max' in value and properties[prop] > value['max']:
                return False
        return True

#Part 4 - User Interface and Visualization Components

In [19]:
#-------------------------------------------------------------------------------
# Enhanced Drug Design Interface
#-------------------------------------------------------------------------------

class DrugDesignInterface:
    """Web interface for drug design platform using Gradio"""

    def __init__(self, drug_design_agent: DrugDesignAgent):
        self.agent = drug_design_agent

    def create_interface(self):
        """Create the Gradio interface"""
        with gr.Blocks(title="AI-Assisted Drug Design Platform") as interface:
            gr.Markdown("""
            # AI-Assisted Drug Design Platform
            ## Integrating AI, Molecular Modeling, and Structure-Based Drug Design
            """)

            # Setup Tab
            with gr.Tab("Setup & Configuration"):
                api_key_input = gr.Textbox(
                    label="OpenAI API Key",
                    placeholder="Enter your OpenAI API key...",
                    type="password"
                )

            # Protein Analysis Tab
            with gr.Tab("Protein Analysis"):
                with gr.Row():
                    with gr.Column():
                        pdb_id_input = gr.Textbox(
                            label="PDB ID",
                            placeholder="Enter PDB ID (e.g., 1AZ5)"
                        )
                        target_description_input = gr.Textbox(
                            label="Target Description",
                            placeholder="Describe the target and goals...",
                            lines=3
                        )
                        analyze_button = gr.Button("Analyze Protein Target")
                    with gr.Column():
                        protein_analysis_output = gr.Textbox(
                            label="Analysis Results",
                            lines=10
                        )

            # Compound Design Tab
            with gr.Tab("Compound Design & Analysis"):
                with gr.Row():
                    with gr.Column(scale=1):
                        smiles_input = gr.Textbox(
                            label="Compound SMILES",
                            placeholder="Enter SMILES string (e.g., CC(=O)OC1=CC=CC=C1C(=O)O for Aspirin)"
                        )
                        optimization_goals_input = gr.Textbox(
                            label="Optimization Goals",
                            placeholder="Enter goals separated by commas...",
                            lines=2
                        )
                        optimize_button = gr.Button("Optimize Compound")
                    with gr.Column(scale=2):
                        mol_2d = gr.Image(label="2D Structure")
                        mol_properties = gr.JSON(label="Molecular Properties")
                        optimization_output = gr.Textbox(label="Optimization Results", lines=6)

            # Compound Screening Tab
            with gr.Tab("Compound Screening"):
                with gr.Row():
                    compound_input = gr.File(
                        label="Compound Library (CSV/TXT file with SMILES)"
                    )
                    screening_criteria_input = gr.Textbox(
                        label="Screening Criteria (JSON)",
                        placeholder="""{"MW": {"max": 500}, "LogP": {"min": -0.4, "max": 5.6}}""",
                        lines=3
                    )
                screen_button = gr.Button("Screen Compounds")
                screening_output = gr.Textbox(
                    label="Screening Results",
                    lines=10
                )

            # Helper Functions
            def process_molecule(smiles):
                """Process molecule and return 2D structure and properties"""
                try:
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        return None, {"error": "Invalid SMILES string"}

                    # Generate 2D image
                    img = Draw.MolToImage(mol)

                    # Calculate properties
                    properties = self.agent.property_calculator.calculate_properties(mol)

                    return img, properties
                except Exception as e:
                    return None, {"error": str(e)}

            def analyze_protein(api_key, pdb_id, description):
                """Handle protein analysis"""
                self.agent.client = OpenAI(api_key=api_key)
                result = self.agent.analyze_protein_target(pdb_id, description)
                return result.get('analysis', str(result))

            def screen_compounds(api_key, file, criteria_json):
                """Handle compound screening"""
                self.agent.client = OpenAI(api_key=api_key)
                try:
                    criteria = json.loads(criteria_json)
                    compounds = []

                    # Better file handling for Gradio uploads
                    if file is not None:
                        with open(file.name, 'r') as f:
                            file_content = f.read()
                            compounds = [line.strip() for line in file_content.split('\n') if line.strip()]

                    result = self.agent.screen_compounds(None, compounds, criteria)
                    return json.dumps(result, indent=2)
                except Exception as e:
                    return f"Error: {str(e)}"

            def optimize_compound(api_key, smiles, goals):
                """Handle compound optimization"""
                self.agent.client = OpenAI(api_key=api_key)
                goals_list = [g.strip() for g in goals.split(',')]
                result = self.agent.optimize_compound(smiles, goals_list)
                return result.get('suggestions', str(result))

            # Connect Event Handlers
            smiles_input.change(
                fn=process_molecule,
                inputs=smiles_input,
                outputs=[mol_2d, mol_properties]
            )

            analyze_button.click(
                fn=analyze_protein,
                inputs=[api_key_input, pdb_id_input, target_description_input],
                outputs=protein_analysis_output
            )

            screen_button.click(
                fn=screen_compounds,
                inputs=[api_key_input, compound_input, screening_criteria_input],
                outputs=screening_output
            )

            optimize_button.click(
                fn=optimize_compound,
                inputs=[api_key_input, smiles_input, optimization_goals_input],
                outputs=optimization_output
            )

        return interface

#Part 5 - Main Execution and Example Usage

In [20]:
#-------------------------------------------------------------------------------
# Main Execution and Example Usage
#-------------------------------------------------------------------------------

def main():
    """Initialize and launch the drug design platform"""
    try:
        # Initialize core components
        memory_manager = MemoryManager()
        agent = DrugDesignAgent("", memory_manager)  # API key will be provided through interface
        interface = DrugDesignInterface(agent)

        # Launch interface
        demo = interface.create_interface()
        demo.launch(share=True)

    except Exception as e:
        print(f"Error starting application: {str(e)}")
        raise

#-------------------------------------------------------------------------------
# Example Data and Usage Guide
#-------------------------------------------------------------------------------

# Example compounds and targets for testing
EXAMPLE_DATA = {
    # Example compounds with their SMILES representations
    "compounds": {
        "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
        "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
        "ibuprofen": "CC(C)Cc1ccc(cc1)[C@H](C)C(=O)O",
        "paracetamol": "CC(=O)NC1=CC=C(O)C=C1"
    },

    # Example protein targets
    "proteins": {
        "BACE1": {
            "pdb_id": "1AZ5",
            "description": "Beta-secretase 1 (BACE1) is a key enzyme in Alzheimer's disease pathology. Looking for potential binding sites for inhibitor development."
        },
        "COX2": {
            "pdb_id": "5KIR",
            "description": "Cyclooxygenase-2 (COX-2) is a key enzyme in inflammation and pain. Target for anti-inflammatory drug development."
        }
    },

    # Example screening criteria
    "screening_criteria": {
        "drug_like": {
            "MW": {"max": 500},
            "LogP": {"min": -0.4, "max": 5.6},
            "TPSA": {"max": 140},
            "HBD": {"max": 5},
            "HBA": {"max": 10}
        },
        "cns_drugs": {
            "MW": {"max": 400},
            "LogP": {"min": 0, "max": 5},
            "TPSA": {"max": 90},
            "HBD": {"max": 3},
            "HBA": {"max": 7}
        }
    }
}

# Usage Guide
USAGE_GUIDE = """
AI-Assisted Drug Design Platform - Quick Start Guide
--------------------------------------------------

1. Setup:
   - Enter your OpenAI API key in the Setup tab
   - The key is required for AI-assisted analysis and optimization

2. Analyzing Proteins:
   - Enter a PDB ID (e.g., 1AZ5 for BACE1)
   - Add a description of your target and goals
   - Click "Analyze Protein Target"

3. Designing Compounds:
   - Enter a SMILES string in the Compound Design tab
   - The 2D structure and properties will update automatically
   - Add optimization goals and click "Optimize Compound"

4. Screening Compounds:
   - Prepare a text file with SMILES strings (one per line)
   - Set screening criteria using the JSON format
   - Click "Screen Compounds"

Example SMILES to try:
- Aspirin:     CC(=O)OC1=CC=CC=C1C(=O)O
- Caffeine:    CN1C=NC2=C1C(=O)N(C(=O)N2C)C
- Ibuprofen:   CC(C)Cc1ccc(cc1)[C@H](C)C(=O)O
- Paracetamol: CC(=O)NC1=CC=C(O)C=C1

Example Screening Criteria:
{
    "MW": {"max": 500},
    "LogP": {"min": -0.4, "max": 5.6},
    "TPSA": {"max": 140}
}
"""

if __name__ == "__main__":
    # Display usage guide
    print(USAGE_GUIDE)

    # Launch the platform
    main()


AI-Assisted Drug Design Platform - Quick Start Guide
--------------------------------------------------

1. Setup:
   - Enter your OpenAI API key in the Setup tab
   - The key is required for AI-assisted analysis and optimization

2. Analyzing Proteins:
   - Enter a PDB ID (e.g., 1AZ5 for BACE1)
   - Add a description of your target and goals
   - Click "Analyze Protein Target"

3. Designing Compounds:
   - Enter a SMILES string in the Compound Design tab
   - The 2D structure and properties will update automatically
   - Add optimization goals and click "Optimize Compound"

4. Screening Compounds:
   - Prepare a text file with SMILES strings (one per line)
   - Set screening criteria using the JSON format
   - Click "Screen Compounds"

Example SMILES to try:
- Aspirin:     CC(=O)OC1=CC=CC=C1C(=O)O
- Caffeine:    CN1C=NC2=C1C(=O)N(C(=O)N2C)C
- Ibuprofen:   CC(C)Cc1ccc(cc1)[C@H](C)C(=O)O
- Paracetamol: CC(=O)NC1=CC=C(O)C=C1

Example Screening Criteria:
{
    "MW": {"max": 500},
    "Lo