In [9]:
import gradio as gr
import requests
from pathlib import Path
import base64
import tempfile
import subprocess
import os
import csv
import re
import time
import sys

# Disease to Protein mapping
DISEASE_PROTEIN_MAP = {
    "inflammation": {
        "anti-inflammatory": "Cyclooxygenase-2",
        "cox-2": "Cyclooxygenase-2",
        "pro-inflammatory": "Tumor necrosis factor-alpha",
        "rheumatoid arthritis": "Tumor necrosis factor-alpha",
        "tnf-alpha": "Tumor necrosis factor-alpha"
    },
    "oncology": {
        "neuro tumor": "Vascular endothelial growth factor receptor 2",
        "glioblastoma": "Vascular endothelial growth factor receptor 2",
        "vegfr-2": "Vascular endothelial growth factor receptor 2"
    },
    "metabolic": {
        "pre-diabetes": "Dipeptidyl peptidase 4",
        "diabetes": "Dipeptidyl peptidase 4",
        "dpp-4": "Dipeptidyl peptidase 4",
        "sglt2": "Sodium/glucose cotransporter 2",
        "obesity": "Glucagon-like peptide 1 receptor",
        "glp-1r": "Glucagon-like peptide 1 receptor"
    },
    "neurodegenerative": {
        "parkinson's disease": "Leucine-rich repeat kinase 2",
        "parkinsons disease": "Leucine-rich repeat kinase 2",
        "lrrk2": "Leucine-rich repeat kinase 2",
        "alzheimer's disease": "Beta-secretase 1",
        "alzheimers disease": "Beta-secretase 1",
        "bace1": "Beta-secretase 1"
    }
}

# SWISS-MODEL Configuration
API_TOKEN = "9e8b3ac03b851bb3834cdb311045c78021087d1d"
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}


def map_disease_to_protein(disease_input: str) -> str:
    """Map a disease name or condition to its protein target."""
    disease_input = disease_input.lower().strip()
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        if disease_input in conditions:
            return conditions[disease_input]
        if disease_input == category:
            return list(conditions.values())[0]
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        for condition_key, protein_name in conditions.items():
            if disease_input in condition_key or condition_key in disease_input:
                return protein_name
    
    return None


def search_pdb_for_first_hit(protein_name: str):
    """Search RCSB PDB and return the first result found."""
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        result_set = data.get('result_set', [])
        if not result_set:
            return None
            
        return result_set[0]['identifier']
        
    except requests.exceptions.RequestException:
        return None


def remove_ligands_from_pdb(pdb_content: str, keep_chain: str = 'A') -> tuple[str, dict]:
    """Remove ligands (HETATM) and keep only a single chain."""
    lines = pdb_content.split('\n')
    cleaned_lines = []
    stats = {
        'hetatm_removed': 0,
        'atoms_kept': 0,
        'chains_removed': set(),
        'conect_removed': 0
    }

    atom_serials = set()

    for line in lines:
        if line.startswith('ATOM'):
            try:
                chain_id = line[21].strip()
                serial = int(line[6:11].strip())
                if chain_id == keep_chain:
                    atom_serials.add(serial)
            except (ValueError, IndexError):
                pass

    for line in lines:
        if line.startswith('ATOM'):
            chain_id = line[21].strip()
            if chain_id == keep_chain:
                cleaned_lines.append(line)
                stats['atoms_kept'] += 1
            else:
                stats['chains_removed'].add(chain_id)

        elif line.startswith('HETATM'):
            stats['hetatm_removed'] += 1
            continue

        elif line.startswith('CONECT'):
            try:
                parts = line.split()
                if len(parts) > 1:
                    serials = [int(x) for x in parts[1:] if x.isdigit()]
                    if all(s in atom_serials for s in serials):
                        cleaned_lines.append(line)
                    else:
                        stats['conect_removed'] += 1
            except (ValueError, IndexError):
                stats['conect_removed'] += 1
                continue

        elif line.startswith(('HEADER', 'TITLE', 'COMPND', 'SOURCE', 'KEYWDS',
                             'EXPDTA', 'AUTHOR', 'REVDAT', 'JRNL', 'REMARK',
                             'SEQRES', 'MODRES', 'HELIX', 'SHEET', 'CRYST1',
                             'ORIGX', 'SCALE', 'MTRIX', 'MODEL', 'ENDMDL',
                             'MASTER', 'END', 'TER')):
            cleaned_lines.append(line)

    return '\n'.join(cleaned_lines), stats


def show_structure(pdb_text: str, pdb_id: str, protein_name: str) -> str:
    """Create 3D visualization HTML for PDB structure using base64 encoding."""
    pdb_escaped = pdb_text.replace('\\', '\\\\').replace('`', '\\`').replace('$', '\\$').replace('\r', '')
    
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <script src="https://3Dmol.org/build/3Dmol-min.js"></script>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}
            body {{
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
                background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
                overflow: hidden;
            }}
            #viewer {{
                width: 100vw;
                height: 100vh;
                background: #0a0e27;
            }}
            .info-panel {{
                position: absolute;
                top: 20px;
                left: 20px;
                background: rgba(255, 255, 255, 0.95);
                padding: 16px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                max-width: 280px;
            }}
            .info-panel h2 {{
                margin: 0 0 2px 0;
                font-size: 20px;
                color: #1e3c72;
                font-weight: 700;
            }}
            .info-panel .subtitle {{
                font-size: 12px;
                color: #666;
                margin-bottom: 0;
                font-weight: 500;
            }}
            .controls {{
                position: absolute;
                bottom: 20px;
                left: 50%;
                transform: translateX(-50%);
                background: rgba(255, 255, 255, 0.95);
                padding: 12px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                display: flex;
                gap: 10px;
                align-items: center;
            }}
            .controls h3 {{
                margin: 0 12px 0 0;
                font-size: 14px;
                color: #1e3c72;
                font-weight: 700;
                white-space: nowrap;
            }}
            .controls button {{
                margin: 0;
                padding: 8px 16px;
                border: none;
                border-radius: 8px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                cursor: pointer;
                font-size: 12px;
                font-weight: 600;
                transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
                box-shadow: 0 3px 10px rgba(102, 126, 234, 0.3);
                white-space: nowrap;
            }}
            .controls button:hover {{
                transform: translateY(-2px);
                box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5);
            }}
            .controls button:active {{
                transform: translateY(0);
            }}
            .loading {{
                position: absolute;
                top: 50%;
                left: 50%;
                transform: translate(-50%, -50%);
                color: white;
                font-size: 18px;
                font-weight: 600;
                z-index: 50;
            }}
        </style>
    </head>
    <body>
        <div id="viewer"></div>
        <div class="loading" id="loading">Loading structure...</div>
        <div class="info-panel">
            <h2>{pdb_id}</h2>
            <div class="subtitle">{protein_name}</div>
        </div>
        <div class="controls">
            <h3>Visualization Style</h3>
            <button onclick="setCartoon()">Cartoon</button>
            <button onclick="setStick()">Stick</button>
            <button onclick="setSphere()">Sphere</button>
            <button onclick="setLine()">Line</button>
        </div>
        <script>
            let viewer;
            const pdbData = `{pdb_escaped}`;
            
            window.onload = function() {{
                try {{
                    const element = document.getElementById('viewer');
                    viewer = $3Dmol.createViewer(element, {{
                        backgroundColor: '#0a0e27'
                    }});
                    
                    viewer.addModel(pdbData, "pdb");
                    viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                    viewer.zoomTo();
                    viewer.render();
                    
                    document.getElementById('loading').style.display = 'none';
                }} catch(e) {{
                    console.error('Error loading structure:', e);
                    document.getElementById('loading').textContent = 'Error loading structure';
                }}
            }};
            
            function setCartoon() {{
                viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                viewer.render();
            }}
            
            function setStick() {{
                viewer.setStyle({{}}, {{'stick': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setSphere() {{
                viewer.setStyle({{}}, {{'sphere': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setLine() {{
                viewer.setStyle({{}}, {{'line': {{'colorscheme': 'chainHetatm'}}}});
                viewer.render();
            }}
        </script>
    </body>
    </html>
    """
    
    b64 = base64.b64encode(html_content.encode()).decode()
    iframe = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" frameborder="0" style="border-radius: 12px; box-shadow: 0 8px 32px rgba(0,0,0,0.1);"></iframe>'
    
    return iframe


def run_ramplot(pdb_id: str, input_folder: str = "proteins/", output_folder: str = "my_analysis_folder"):
    """Run RAMPlot analysis on the PDB file."""
    os.makedirs(input_folder, exist_ok=True)
    os.makedirs(output_folder, exist_ok=True)
    
    cmd = [
        "ramplot", "pdb",
        "-i", input_folder,
        "-o", output_folder,
        "-m", "0",
        "-r", "600",
        "-p", "png"
    ]
    
    try:
        result = subprocess.run(cmd, check=True, text=True, capture_output=True)
        
        # Look for the generated plot files
        plot_files = {
            'map2d': os.path.join(output_folder, "Plots", "MapType2DAll.png"),
            'map3d': os.path.join(output_folder, "Plots", "MapType3DAll.png"),
            'std2d': os.path.join(output_folder, "Plots", "StdMapType2DGeneralGly.png"),
            'std3d': os.path.join(output_folder, "Plots", "StdMapType3DGeneral.png")
        }
        
        return True, plot_files, result.stdout
        
    except subprocess.CalledProcessError as e:
        return False, None, e.stderr


def extract_favoured_percentage(csv_path: str):
    """Extract the Favoured percentage from CSV file."""
    try:
        with open(csv_path, 'r') as f:
            content = f.read()
            match = re.search(r'Favoured:\s*,\d+,\((\d+\.?\d*)%\)', content)
            
            if match:
                return float(match.group(1))
            else:
                return None
                
    except Exception as e:
        return None


def run_swissmodel(pdb_id: str, fasta_path: str, csv_path: str):
    """Run SWISS-MODEL if favoured percentage < 90%."""
    
    # Check favoured percentage
    favoured_percent = extract_favoured_percentage(csv_path)
    
    if favoured_percent is None:
        return False, None, "Could not extract Favoured percentage from CSV"
    
    if favoured_percent >= 90.0:
        return True, None, f"Favoured percentage ({favoured_percent}%) is >= 90%. SWISS-MODEL not needed."
    
    # Read FASTA file
    try:
        sequences = []
        current_sequence = []
        
        with open(fasta_path, 'r') as f:
            for line in f:
                line = line.strip()
                if line.startswith('>'):
                    if current_sequence:
                        sequences.append("".join(current_sequence))
                        current_sequence = []
                else:
                    if line:
                        current_sequence.append(line)
            
            if current_sequence:
                sequences.append("".join(current_sequence))
        
        if not sequences:
            return False, None, "No valid sequences found in FASTA file"
        
        fasta_input = sequences[0] if len(sequences) == 1 else sequences
        
    except Exception as e:
        return False, None, f"Error reading FASTA file: {e}"
    
    # Submit to SWISS-MODEL
    try:
        payload = {
            "target_sequences": fasta_input,
            "project_title": f"{pdb_id}_Homology_Model"
        }
        
        submit_response = requests.post(
            f"{BASE_URL}/automodel/", 
            headers=HEADERS, 
            json=payload
        )
        submit_response.raise_for_status()
        
        project_id = submit_response.json().get("project_id")
        
        # Poll for results
        max_attempts = 60
        for attempt in range(max_attempts):
            status_response = requests.get(
                f"{BASE_URL}/project/{project_id}/models/summary/", 
                headers=HEADERS
            )
            status_response.raise_for_status()
            
            status_data = status_response.json()
            job_status = status_data.get("status")
            
            if job_status == "COMPLETED":
                models = status_data.get("models")
                if not models:
                    return False, None, "No models generated"
                
                model_id = models[0].get("model_id")
                output_filename = f"{project_id}_{model_id}.pdb"
                
                pdb_response = requests.get(
                    f"{BASE_URL}/project/{project_id}/models/{model_id}.pdb",
                    headers=HEADERS
                )
                pdb_response.raise_for_status()
                
                with open(output_filename, "w") as f:
                    f.write(pdb_response.text)
                
                return True, output_filename, f"SWISS-MODEL completed. Saved to {output_filename}"
                
            elif job_status == "FAILED":
                return False, None, "SWISS-MODEL job failed"
            
            time.sleep(10)
        
        return False, None, "SWISS-MODEL timeout"
        
    except Exception as e:
        return False, None, f"SWISS-MODEL error: {e}"


def process_disease(disease_name: str):
    """Main function to process disease and return structure with analysis."""
    if not disease_name.strip():
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            analysis_status: gr.update(value="‚ö†Ô∏è Please enter a disease or condition", visible=True),
            plot1: gr.update(value=None, visible=False),
            plot2: gr.update(value=None, visible=False),
            plot3: gr.update(value=None, visible=False),
            plot4: gr.update(value=None, visible=False)
        }
    
    # Map disease to protein
    protein_name = map_disease_to_protein(disease_name)
    
    if not protein_name:
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            analysis_status: gr.update(value="‚ùå No protein mapping found", visible=True),
            plot1: gr.update(value=None, visible=False),
            plot2: gr.update(value=None, visible=False),
            plot3: gr.update(value=None, visible=False),
            plot4: gr.update(value=None, visible=False)
        }
    
    # Search PDB
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            analysis_status: gr.update(value="‚ùå No PDB structure found", visible=True),
            plot1: gr.update(value=None, visible=False),
            plot2: gr.update(value=None, visible=False),
            plot3: gr.update(value=None, visible=False),
            plot4: gr.update(value=None, visible=False)
        }
    
    # Download PDB file
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        pdb_content = response.text
        
        # Clean structure
        pdb_content, stats = remove_ligands_from_pdb(pdb_content, 'A')
        
        # Save to proteins folder
        proteins_folder = "proteins"
        os.makedirs(proteins_folder, exist_ok=True)
        pdb_path = os.path.join(proteins_folder, f"{pdb_id}.pdb")
        
        with open(pdb_path, 'w') as f:
            f.write(pdb_content)
        
        # Build info display
        info_html = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 16px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.1);">
            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Disease/Condition</div>
                    <div style="font-size: 20px; font-weight: 700;">{disease_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Target Protein</div>
                    <div style="font-size: 20px; font-weight: 700;">{protein_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">PDB Structure ID</div>
                    <div style="font-size: 20px; font-weight: 700;">{pdb_id}</div>
                </div>
            </div>
        </div>
        """
        
        # Create 3D visualization
        structure_html = show_structure(pdb_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False)
        temp_file.write(pdb_content)
        temp_file.close()
        
        # Run RAMPlot analysis
        analysis_msg = "üî¨ Running Ramachandran plot analysis..."
        success, plot_files, output = run_ramplot(pdb_id)
        
        if success and plot_files:
            analysis_msg = "‚úÖ Analysis completed! Check plots below."
            
            return {
                info_box: gr.update(value=info_html, visible=True),
                structure_viewer: gr.update(value=structure_html),
                download_file: gr.update(value=temp_file.name),
                analysis_status: gr.update(value=analysis_msg, visible=True),
                plot1: gr.update(value=plot_files['map2d'], visible=True),
                plot2: gr.update(value=plot_files['map3d'], visible=True),
                plot3: gr.update(value=plot_files['std2d'], visible=True),
                plot4: gr.update(value=plot_files['std3d'], visible=True)
            }
        else:
            analysis_msg = f"‚ö†Ô∏è Analysis failed: {output}"
            
            return {
                info_box: gr.update(value=info_html, visible=True),
                structure_viewer: gr.update(value=structure_html),
                download_file: gr.update(value=temp_file.name),
                analysis_status: gr.update(value=analysis_msg, visible=True),
                plot1: gr.update(value=None, visible=False),
                plot2: gr.update(value=None, visible=False),
                plot3: gr.update(value=None, visible=False),
                plot4: gr.update(value=None, visible=False)
            }
        
    except Exception as e:
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            analysis_status: gr.update(value=f"‚ùå Error: {str(e)}", visible=True),
            plot1: gr.update(value=None, visible=False),
            plot2: gr.update(value=None, visible=False),
            plot3: gr.update(value=None, visible=False),
            plot4: gr.update(value=None, visible=False)
        }


# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), css="""
    .gradio-container {
        max-width: 1600px !important;
    }
    .main-header {
        text-align: center;
        padding: 40px 20px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 20px;
        color: white;
        margin-bottom: 30px;
    }
    .main-header h1 {
        font-size: 42px;
        font-weight: 800;
        margin: 0 0 10px 0;
    }
    .main-header p {
        font-size: 18px;
        opacity: 0.95;
        margin: 0;
        font-weight: 500;
    }
    .plot-grid {
        display: grid;
        grid-template-columns: repeat(2, 1fr);
        gap: 20px;
        margin-top: 20px;
    }
""", title="Protein Structure Finder & Analyzer") as demo:
    
    gr.HTML("""
        <div class="main-header">
            <h1>üß¨ Protein Structure Finder & Analyzer</h1>
            <p>Discover, visualize and analyze protein structures related to diseases</p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            disease_input = gr.Textbox(
                label="üîç Enter Disease or Condition",
                placeholder="e.g., Alzheimer's Disease, diabetes, inflammation...",
                lines=1
            )
            
            search_btn = gr.Button("üöÄ Search & Analyze", variant="primary", size="lg")
            
            info_box = gr.HTML(visible=False)
            analysis_status = gr.Markdown(visible=False)
            download_file = gr.File(label="üíæ Download PDB File", visible=True)
        
        with gr.Column(scale=2):
            structure_viewer = gr.HTML(label="üî¨ 3D Structure Viewer")
    
    with gr.Row():
        gr.Markdown("## üìä Ramachandran Plot Analysis")
    
    with gr.Row():
        with gr.Column():
            plot1 = gr.Image(label="Map Type 2D All", visible=False)
        with gr.Column():
            plot2 = gr.Image(label="Map Type 3D All", visible=False)
    
    with gr.Row():
        with gr.Column():
            plot3 = gr.Image(label="Std Map Type 2D General Gly", visible=False)
        with gr.Column():
            plot4 = gr.Image(label="Std Map Type 3D General", visible=False)
    
    # Event handlers
    search_btn.click(
        fn=process_disease,
        inputs=[disease_input],
        outputs={info_box, structure_viewer, download_file, analysis_status, plot1, plot2, plot3, plot4}
    )

if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7875

To create a public link, set `share=True` in `launch()`.


In [10]:
import gradio as gr
import requests
from pathlib import Path
import base64
import tempfile
import subprocess
import os
import csv
import re
import time
import sys
import zipfile
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Disease to Protein mapping
DISEASE_PROTEIN_MAP = {
    "inflammation": {
        "anti-inflammatory": "Cyclooxygenase-2",
        "cox-2": "Cyclooxygenase-2",
        "pro-inflammatory": "Tumor necrosis factor-alpha",
        "rheumatoid arthritis": "Tumor necrosis factor-alpha",
        "tnf-alpha": "Tumor necrosis factor-alpha"
    },
    "oncology": {
        "neuro tumor": "Vascular endothelial growth factor receptor 2",
        "glioblastoma": "Vascular endothelial growth factor receptor 2",
        "vegfr-2": "Vascular endothelial growth factor receptor 2"
    },
    "metabolic": {
        "pre-diabetes": "Dipeptidyl peptidase 4",
        "diabetes": "Dipeptidyl peptidase 4",
        "dpp-4": "Dipeptidyl peptidase 4",
        "sglt2": "Sodium/glucose cotransporter 2",
        "obesity": "Glucagon-like peptide 1 receptor",
        "glp-1r": "Glucagon-like peptide 1 receptor"
    },
    "neurodegenerative": {
        "parkinson's disease": "Leucine-rich repeat kinase 2",
        "parkinsons disease": "Leucine-rich repeat kinase 2",
        "lrrk2": "Leucine-rich repeat kinase 2",
        "alzheimer's disease": "Beta-secretase 1",
        "alzheimers disease": "Beta-secretase 1",
        "bace1": "Beta-secretase 1"
    }
}

# SWISS-MODEL Configuration
API_TOKEN = "9e8b3ac03b851bb3834cdb311045c78021087d1d"
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

# Global variable to store current PDB info
current_pdb_info = {"pdb_id": None, "pdb_path": None}


def map_disease_to_protein(disease_input: str) -> str:
    """Map a disease name or condition to its protein target."""
    disease_input = disease_input.lower().strip()
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        if disease_input in conditions:
            return conditions[disease_input]
        if disease_input == category:
            return list(conditions.values())[0]
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        for condition_key, protein_name in conditions.items():
            if disease_input in condition_key or condition_key in disease_input:
                return protein_name
    
    return None


def search_pdb_for_first_hit(protein_name: str):
    """Search RCSB PDB and return the first result found."""
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        result_set = data.get('result_set', [])
        if not result_set:
            return None
            
        return result_set[0]['identifier']
        
    except requests.exceptions.RequestException:
        return None


def remove_ligands_from_pdb(pdb_content: str, keep_chain: str = 'A') -> tuple[str, dict]:
    """Remove ligands (HETATM) and keep only a single chain."""
    lines = pdb_content.split('\n')
    cleaned_lines = []
    stats = {
        'hetatm_removed': 0,
        'atoms_kept': 0,
        'chains_removed': set(),
        'conect_removed': 0
    }

    atom_serials = set()

    for line in lines:
        if line.startswith('ATOM'):
            try:
                chain_id = line[21].strip()
                serial = int(line[6:11].strip())
                if chain_id == keep_chain:
                    atom_serials.add(serial)
            except (ValueError, IndexError):
                pass

    for line in lines:
        if line.startswith('ATOM'):
            chain_id = line[21].strip()
            if chain_id == keep_chain:
                cleaned_lines.append(line)
                stats['atoms_kept'] += 1
            else:
                stats['chains_removed'].add(chain_id)

        elif line.startswith('HETATM'):
            stats['hetatm_removed'] += 1
            continue

        elif line.startswith('CONECT'):
            try:
                parts = line.split()
                if len(parts) > 1:
                    serials = [int(x) for x in parts[1:] if x.isdigit()]
                    if all(s in atom_serials for s in serials):
                        cleaned_lines.append(line)
                    else:
                        stats['conect_removed'] += 1
            except (ValueError, IndexError):
                stats['conect_removed'] += 1
                continue

        elif line.startswith(('HEADER', 'TITLE', 'COMPND', 'SOURCE', 'KEYWDS',
                             'EXPDTA', 'AUTHOR', 'REVDAT', 'JRNL', 'REMARK',
                             'SEQRES', 'MODRES', 'HELIX', 'SHEET', 'CRYST1',
                             'ORIGX', 'SCALE', 'MTRIX', 'MODEL', 'ENDMDL',
                             'MASTER', 'END', 'TER')):
            cleaned_lines.append(line)

    return '\n'.join(cleaned_lines), stats


def show_structure(pdb_text: str, pdb_id: str, protein_name: str) -> str:
    """Create 3D visualization HTML for PDB structure using base64 encoding."""
    pdb_escaped = pdb_text.replace('\\', '\\\\').replace('`', '\\`').replace('$', '\\$').replace('\r', '')
    
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <script src="https://3Dmol.org/build/3Dmol-min.js"></script>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}
            body {{
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
                background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
                overflow: hidden;
            }}
            #viewer {{
                width: 100vw;
                height: 100vh;
                background: #0a0e27;
            }}
            .info-panel {{
                position: absolute;
                top: 20px;
                left: 20px;
                background: rgba(255, 255, 255, 0.95);
                padding: 16px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                max-width: 280px;
            }}
            .info-panel h2 {{
                margin: 0 0 2px 0;
                font-size: 20px;
                color: #1e3c72;
                font-weight: 700;
            }}
            .info-panel .subtitle {{
                font-size: 12px;
                color: #666;
                margin-bottom: 0;
                font-weight: 500;
            }}
            .controls {{
                position: absolute;
                bottom: 20px;
                left: 50%;
                transform: translateX(-50%);
                background: rgba(255, 255, 255, 0.95);
                padding: 12px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                display: flex;
                gap: 10px;
                align-items: center;
            }}
            .controls h3 {{
                margin: 0 12px 0 0;
                font-size: 14px;
                color: #1e3c72;
                font-weight: 700;
                white-space: nowrap;
            }}
            .controls button {{
                margin: 0;
                padding: 8px 16px;
                border: none;
                border-radius: 8px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                cursor: pointer;
                font-size: 12px;
                font-weight: 600;
                transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
                box-shadow: 0 3px 10px rgba(102, 126, 234, 0.3);
                white-space: nowrap;
            }}
            .controls button:hover {{
                transform: translateY(-2px);
                box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5);
            }}
            .controls button:active {{
                transform: translateY(0);
            }}
            .loading {{
                position: absolute;
                top: 50%;
                left: 50%;
                transform: translate(-50%, -50%);
                color: white;
                font-size: 18px;
                font-weight: 600;
                z-index: 50;
            }}
        </style>
    </head>
    <body>
        <div id="viewer"></div>
        <div class="loading" id="loading">Loading structure...</div>
        <div class="info-panel">
            <h2>{pdb_id}</h2>
            <div class="subtitle">{protein_name}</div>
        </div>
        <div class="controls">
            <h3>Visualization Style</h3>
            <button onclick="setCartoon()">Cartoon</button>
            <button onclick="setStick()">Stick</button>
            <button onclick="setSphere()">Sphere</button>
            <button onclick="setLine()">Line</button>
        </div>
        <script>
            let viewer;
            const pdbData = `{pdb_escaped}`;
            
            window.onload = function() {{
                try {{
                    const element = document.getElementById('viewer');
                    viewer = $3Dmol.createViewer(element, {{
                        backgroundColor: '#0a0e27'
                    }});
                    
                    viewer.addModel(pdbData, "pdb");
                    viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                    viewer.zoomTo();
                    viewer.render();
                    
                    document.getElementById('loading').style.display = 'none';
                }} catch(e) {{
                    console.error('Error loading structure:', e);
                    document.getElementById('loading').textContent = 'Error loading structure';
                }}
            }};
            
            function setCartoon() {{
                viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                viewer.render();
            }}
            
            function setStick() {{
                viewer.setStyle({{}}, {{'stick': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setSphere() {{
                viewer.setStyle({{}}, {{'sphere': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setLine() {{
                viewer.setStyle({{}}, {{'line': {{'colorscheme': 'chainHetatm'}}}});
                viewer.render();
            }}
        </script>
    </body>
    </html>
    """
    
    b64 = base64.b64encode(html_content.encode()).decode()
    iframe = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" frameborder="0" style="border-radius: 12px; box-shadow: 0 8px 32px rgba(0,0,0,0.1);"></iframe>'
    
    return iframe


def run_ramplot():
    """Run RAMPlot analysis on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="‚ùå No structure loaded. Please search for a disease first.", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    
    pdb_id = current_pdb_info["pdb_id"]
    input_folder = "proteins/"
    output_folder = "my_analysis_folder"
    
    os.makedirs(input_folder, exist_ok=True)
    os.makedirs(output_folder, exist_ok=True)
    
    cmd = [
        "ramplot", "pdb",
        "-i", input_folder,
        "-o", output_folder,
        "-m", "0",
        "-r", "600",
        "-p", "png"
    ]
    
    try:
        result = subprocess.run(cmd, check=True, text=True, capture_output=True)
        
        # Look for the generated plot files
        plot_files = {
            'map2d': os.path.join(output_folder, "Plots", "MapType2DAll.png"),
            'map3d': os.path.join(output_folder, "Plots", "MapType3DAll.png"),
            'std2d': os.path.join(output_folder, "Plots", "StdMapType2DGeneralGly.png"),
            'std3d': os.path.join(output_folder, "Plots", "StdMapType3DGeneral.png")
        }
        
        return (
            gr.update(value="‚úÖ Ramachandran plot analysis completed!", visible=True),
            gr.update(value=plot_files['map2d'], visible=True),
            gr.update(value=plot_files['map3d'], visible=True),
            gr.update(value=plot_files['std2d'], visible=True),
            gr.update(value=plot_files['std3d'], visible=True)
        )
        
    except subprocess.CalledProcessError as e:
        return (
            gr.update(value=f"‚ö†Ô∏è Analysis failed: {e.stderr}", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    except Exception as e:
        return (
            gr.update(value=f"‚ùå Error: {str(e)}", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )


def run_prankweb_prediction():
    """Run PrankWeb prediction on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="‚ùå No structure loaded. Please search for a disease first.", visible=True),
            gr.update(value=None, visible=False)
        )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    output_dir = "prankweb_results"
    os.makedirs(output_dir, exist_ok=True)
    
    absolute_path = os.path.abspath(pdb_path)
    
    # Setup Chrome driver with download preferences and HEADLESS mode
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless=new')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--window-size=1920,1080')
    prefs = {
        "download.default_directory": os.path.abspath(output_dir),
        "download.prompt_for_download": False,
    }
    chrome_options.add_experimental_option("prefs", prefs)
    
    try:
        driver = webdriver.Chrome(options=chrome_options)
        
        driver.get("https://prankweb.cz/")
        time.sleep(3)
        
        wait = WebDriverWait(driver, 30)
        custom_structure = wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Custom structure')]")))
        driver.execute_script("arguments[0].click();", custom_structure)
        time.sleep(1)
        
        file_input = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
        file_input.send_keys(absolute_path)
        time.sleep(2)
        
        submit_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[type='submit']")))
        driver.execute_script("arguments[0].click();", submit_btn)
        
        wait_long = WebDriverWait(driver, 600)
        info_tab = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Info')]")))
        
        driver.execute_script("arguments[0].click();", info_tab)
        time.sleep(2)
        
        download_btn = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Download prediction data')]")))
        driver.execute_script("arguments[0].click();", download_btn)
        
        time.sleep(10)
        driver.quit()
        
        # Find and extract the zip file
        zip_files = [f for f in os.listdir(output_dir) if f.endswith('.zip')]
        if not zip_files:
            return (
                gr.update(value="‚ùå Download failed - no zip file found", visible=True),
                gr.update(value=None, visible=False)
            )
        
        zip_path = os.path.join(output_dir, zip_files[0])
        extract_path = os.path.join(output_dir, zip_files[0].replace('.zip', ''))
        
        os.makedirs(extract_path, exist_ok=True)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        
        # Find the CSV file
        csv_path = os.path.join(extract_path, "structure.pdb_predictions.csv")
        if not os.path.exists(csv_path):
            return (
                gr.update(value="‚ùå CSV file not found in extracted data", visible=True),
                gr.update(value=None, visible=False)
            )
        
        # Read and filter CSV
        df = pd.read_csv(csv_path)
        columns_to_drop = ['residue_ids', 'surf_atom_ids']
        df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')
        
        return (
            gr.update(value="‚úÖ PrankWeb prediction completed! Results shown below.", visible=True),
            gr.update(value=df, visible=True)
        )
        
    except Exception as e:
        if 'driver' in locals():
            driver.quit()
        return (
            gr.update(value=f"‚ùå Error: {str(e)}", visible=True),
            gr.update(value=None, visible=False)
        )


def process_disease(disease_name: str):
    """Main function to process disease and return structure."""
    global current_pdb_info
    
    if not disease_name.strip():
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ö†Ô∏è Please enter a disease or condition", visible=True)
        }
    
    # Map disease to protein
    protein_name = map_disease_to_protein(disease_name)
    
    if not protein_name:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No protein mapping found", visible=True)
        }
    
    # Search PDB
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No PDB structure found", visible=True)
        }
    
    # Download PDB file
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        pdb_content = response.text
        
        # Clean structure
        pdb_content, stats = remove_ligands_from_pdb(pdb_content, 'A')
        
        # Save to proteins folder
        proteins_folder = "proteins"
        os.makedirs(proteins_folder, exist_ok=True)
        pdb_path = os.path.join(proteins_folder, f"{pdb_id}.pdb")
        
        with open(pdb_path, 'w') as f:
            f.write(pdb_content)
        
        # Update global variable
        current_pdb_info = {"pdb_id": pdb_id, "pdb_path": pdb_path}
        
        # Build info display
        info_html = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 16px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.1);">
            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Disease/Condition</div>
                    <div style="font-size: 20px; font-weight: 700;">{disease_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Target Protein</div>
                    <div style="font-size: 20px; font-weight: 700;">{protein_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">PDB Structure ID</div>
                    <div style="font-size: 20px; font-weight: 700;">{pdb_id}</div>
                </div>
            </div>
        </div>
        """
        
        # Create 3D visualization
        structure_html = show_structure(pdb_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False)
        temp_file.write(pdb_content)
        temp_file.close()
        
        return {
            info_box: gr.update(value=info_html, visible=True),
            structure_viewer: gr.update(value=structure_html),
            download_file: gr.update(value=temp_file.name),
            search_status: gr.update(value="‚úÖ Structure loaded successfully!", visible=True)
        }
        
    except Exception as e:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value=f"‚ùå Error: {str(e)}", visible=True)
        }


# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), css="""
    .gradio-container {
        max-width: 1600px !important;
    }
    .main-header {
        text-align: center;
        padding: 40px 20px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 20px;
        color: white;
        margin-bottom: 30px;
    }
    .main-header h1 {
        font-size: 42px;
        font-weight: 800;
        margin: 0 0 10px 0;
    }
    .main-header p {
        font-size: 18px;
        opacity: 0.95;
        margin: 0;
        font-weight: 500;
    }
""", title="Protein Structure Finder & Analyzer") as demo:
    
    gr.HTML("""
        <div class="main-header">
            <h1>üß¨ Protein Structure Finder & Analyzer</h1>
            <p>Discover, visualize and analyze protein structures related to diseases</p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            disease_input = gr.Textbox(
                label="üîç Enter Disease or Condition",
                placeholder="e.g., Alzheimer's Disease, diabetes, inflammation...",
                lines=1
            )
            
            search_btn = gr.Button("üöÄ Search Structure", variant="primary", size="lg")
            
            info_box = gr.HTML(visible=False)
            search_status = gr.Markdown(visible=False)
            download_file = gr.File(label="üíæ Download PDB File", visible=True)
        
        with gr.Column(scale=2):
            structure_viewer = gr.HTML(label="üî¨ 3D Structure Viewer")
    
    # Ramachandran Plot Section
    with gr.Row():
        gr.Markdown("## üìä Ramachandran Plot Analysis")
    
    with gr.Row():
        ramplot_btn = gr.Button("üî¨ Run Ramachandran Analysis", variant="secondary", size="lg")
    
    ramplot_status = gr.Markdown(visible=False)
    
    with gr.Row():
        with gr.Column():
            plot1 = gr.Image(label="Map Type 2D All", visible=False)
        with gr.Column():
            plot2 = gr.Image(label="Map Type 3D All", visible=False)
    
    with gr.Row():
        with gr.Column():
            plot3 = gr.Image(label="Std Map Type 2D General Gly", visible=False)
        with gr.Column():
            plot4 = gr.Image(label="Std Map Type 3D General", visible=False)
    
    # PrankWeb Section
    with gr.Row():
        gr.Markdown("## üéØ PrankWeb Binding Site Prediction")
    
    with gr.Row():
        prankweb_btn = gr.Button("üîÆ Run PrankWeb Prediction", variant="secondary", size="lg")
    
    prankweb_status = gr.Markdown(visible=False)
    prankweb_results = gr.Dataframe(label="Prediction Results", visible=False)
    
    # Event handlers
    search_btn.click(
        fn=process_disease,
        inputs=[disease_input],
        outputs={info_box, structure_viewer, download_file, search_status}
    )
    
    ramplot_btn.click(
        fn=run_ramplot,
        inputs=[],
        outputs=[ramplot_status, plot1, plot2, plot3, plot4]
    )
    
    prankweb_btn.click(
        fn=run_prankweb_prediction,
        inputs=[],
        outputs=[prankweb_status, prankweb_results]
    )

if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7876

To create a public link, set `share=True` in `launch()`.


In [None]:
import gradio as gr
import requests
from pathlib import Path
import base64
import tempfile
import subprocess
import os
import csv
import re
import time
import sys
import zipfile
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Disease to Protein mapping
DISEASE_PROTEIN_MAP = {
    "inflammation": {
        "anti-inflammatory": "Cyclooxygenase-2",
        "cox-2": "Cyclooxygenase-2",
        "pro-inflammatory": "Tumor necrosis factor-alpha",
        "rheumatoid arthritis": "Tumor necrosis factor-alpha",
        "tnf-alpha": "Tumor necrosis factor-alpha"
    },
    "oncology": {
        "neuro tumor": "Vascular endothelial growth factor receptor 2",
        "glioblastoma": "Vascular endothelial growth factor receptor 2",
        "vegfr-2": "Vascular endothelial growth factor receptor 2"
    },
    "metabolic": {
        "pre-diabetes": "Dipeptidyl peptidase 4",
        "diabetes": "Dipeptidyl peptidase 4",
        "dpp-4": "Dipeptidyl peptidase 4",
        "sglt2": "Sodium/glucose cotransporter 2",
        "obesity": "Glucagon-like peptide 1 receptor",
        "glp-1r": "Glucagon-like peptide 1 receptor"
    },
    "neurodegenerative": {
        "parkinson's disease": "Leucine-rich repeat kinase 2",
        "parkinsons disease": "Leucine-rich repeat kinase 2",
        "lrrk2": "Leucine-rich repeat kinase 2",
        "alzheimer's disease": "Beta-secretase 1",
        "alzheimers disease": "Beta-secretase 1",
        "bace1": "Beta-secretase 1"
    }
}

# SWISS-MODEL Configuration
API_TOKEN = "9e8b3ac03b851bb3834cdb311045c78021087d1d"
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

# Global variable to store current PDB info
current_pdb_info = {"pdb_id": None, "pdb_path": None}


def map_disease_to_protein(disease_input: str) -> str:
    """Map a disease name or condition to its protein target."""
    disease_input = disease_input.lower().strip()
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        if disease_input in conditions:
            return conditions[disease_input]
        if disease_input == category:
            return list(conditions.values())[0]
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        for condition_key, protein_name in conditions.items():
            if disease_input in condition_key or condition_key in disease_input:
                return protein_name
    
    return None


def search_pdb_for_first_hit(protein_name: str):
    """Search RCSB PDB and return the first result found."""
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        result_set = data.get('result_set', [])
        if not result_set:
            return None
            
        return result_set[0]['identifier']
        
    except requests.exceptions.RequestException:
        return None


def remove_ligands_from_pdb(pdb_content: str, keep_chain: str = 'A') -> tuple[str, dict]:
    """Remove ligands (HETATM) and keep only a single chain."""
    lines = pdb_content.split('\n')
    cleaned_lines = []
    stats = {
        'hetatm_removed': 0,
        'atoms_kept': 0,
        'chains_removed': set(),
        'conect_removed': 0
    }

    atom_serials = set()

    for line in lines:
        if line.startswith('ATOM'):
            try:
                chain_id = line[21].strip()
                serial = int(line[6:11].strip())
                if chain_id == keep_chain:
                    atom_serials.add(serial)
            except (ValueError, IndexError):
                pass

    for line in lines:
        if line.startswith('ATOM'):
            chain_id = line[21].strip()
            if chain_id == keep_chain:
                cleaned_lines.append(line)
                stats['atoms_kept'] += 1
            else:
                stats['chains_removed'].add(chain_id)

        elif line.startswith('HETATM'):
            stats['hetatm_removed'] += 1
            continue

        elif line.startswith('CONECT'):
            try:
                parts = line.split()
                if len(parts) > 1:
                    serials = [int(x) for x in parts[1:] if x.isdigit()]
                    if all(s in atom_serials for s in serials):
                        cleaned_lines.append(line)
                    else:
                        stats['conect_removed'] += 1
            except (ValueError, IndexError):
                stats['conect_removed'] += 1
                continue

        elif line.startswith(('HEADER', 'TITLE', 'COMPND', 'SOURCE', 'KEYWDS',
                             'EXPDTA', 'AUTHOR', 'REVDAT', 'JRNL', 'REMARK',
                             'SEQRES', 'MODRES', 'HELIX', 'SHEET', 'CRYST1',
                             'ORIGX', 'SCALE', 'MTRIX', 'MODEL', 'ENDMDL',
                             'MASTER', 'END', 'TER')):
            cleaned_lines.append(line)

    return '\n'.join(cleaned_lines), stats


def show_structure(pdb_text: str, pdb_id: str, protein_name: str) -> str:
    """Create 3D visualization HTML for PDB structure using base64 encoding."""
    pdb_escaped = pdb_text.replace('\\', '\\\\').replace('`', '\\`').replace('$', '\\$').replace('\r', '')
    
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <script src="https://3Dmol.org/build/3Dmol-min.js"></script>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}
            body {{
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
                background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
                overflow: hidden;
            }}
            #viewer {{
                width: 100vw;
                height: 100vh;
                background: #0a0e27;
            }}
            .info-panel {{
                position: absolute;
                top: 20px;
                left: 20px;
                background: rgba(255, 255, 255, 0.95);
                padding: 16px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                max-width: 280px;
            }}
            .info-panel h2 {{
                margin: 0 0 2px 0;
                font-size: 20px;
                color: #1e3c72;
                font-weight: 700;
            }}
            .info-panel .subtitle {{
                font-size: 12px;
                color: #666;
                margin-bottom: 0;
                font-weight: 500;
            }}
            .controls {{
                position: absolute;
                bottom: 20px;
                left: 50%;
                transform: translateX(-50%);
                background: rgba(255, 255, 255, 0.95);
                padding: 12px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                display: flex;
                gap: 10px;
                align-items: center;
            }}
            .controls h3 {{
                margin: 0 12px 0 0;
                font-size: 14px;
                color: #1e3c72;
                font-weight: 700;
                white-space: nowrap;
            }}
            .controls button {{
                margin: 0;
                padding: 8px 16px;
                border: none;
                border-radius: 8px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                cursor: pointer;
                font-size: 12px;
                font-weight: 600;
                transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
                box-shadow: 0 3px 10px rgba(102, 126, 234, 0.3);
                white-space: nowrap;
            }}
            .controls button:hover {{
                transform: translateY(-2px);
                box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5);
            }}
            .controls button:active {{
                transform: translateY(0);
            }}
            .loading {{
                position: absolute;
                top: 50%;
                left: 50%;
                transform: translate(-50%, -50%);
                color: white;
                font-size: 18px;
                font-weight: 600;
                z-index: 50;
            }}
        </style>
    </head>
    <body>
        <div id="viewer"></div>
        <div class="loading" id="loading">Loading structure...</div>
        <div class="info-panel">
            <h2>{pdb_id}</h2>
            <div class="subtitle">{protein_name}</div>
        </div>
        <div class="controls">
            <h3>Visualization Style</h3>
            <button onclick="setCartoon()">Cartoon</button>
            <button onclick="setStick()">Stick</button>
            <button onclick="setSphere()">Sphere</button>
            <button onclick="setLine()">Line</button>
        </div>
        <script>
            let viewer;
            const pdbData = `{pdb_escaped}`;
            
            window.onload = function() {{
                try {{
                    const element = document.getElementById('viewer');
                    viewer = $3Dmol.createViewer(element, {{
                        backgroundColor: '#0a0e27'
                    }});
                    
                    viewer.addModel(pdbData, "pdb");
                    viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                    viewer.zoomTo();
                    viewer.render();
                    
                    document.getElementById('loading').style.display = 'none';
                }} catch(e) {{
                    console.error('Error loading structure:', e);
                    document.getElementById('loading').textContent = 'Error loading structure';
                }}
            }};
            
            function setCartoon() {{
                viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                viewer.render();
            }}
            
            function setStick() {{
                viewer.setStyle({{}}, {{'stick': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setSphere() {{
                viewer.setStyle({{}}, {{'sphere': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setLine() {{
                viewer.setStyle({{}}, {{'line': {{'colorscheme': 'chainHetatm'}}}});
                viewer.render();
            }}
        </script>
    </body>
    </html>
    """
    
    b64 = base64.b64encode(html_content.encode()).decode()
    iframe = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" frameborder="0" style="border-radius: 12px; box-shadow: 0 8px 32px rgba(0,0,0,0.1);"></iframe>'
    
    return iframe


def run_ramplot():
    """Run RAMPlot analysis on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>üî¨ Processing Ramachandran plot analysis...</div>", visible=True),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False)
    )
    
    pdb_id = current_pdb_info["pdb_id"]
    input_folder = "proteins/"
    output_folder = "my_analysis_folder"
    
    os.makedirs(input_folder, exist_ok=True)
    os.makedirs(output_folder, exist_ok=True)
    
    cmd = [
        "ramplot", "pdb",
        "-i", input_folder,
        "-o", output_folder,
        "-m", "0",
        "-r", "600",
        "-p", "png"
    ]
    
    try:
        result = subprocess.run(cmd, check=True, text=True, capture_output=True)
        
        # Look for the generated plot files
        plot_files = {
            'map2d': os.path.join(output_folder, "Plots", "MapType2DAll.png"),
            'map3d': os.path.join(output_folder, "Plots", "MapType3DAll.png"),
            'std2d': os.path.join(output_folder, "Plots", "StdMapType2DGeneralGly.png"),
            'std3d': os.path.join(output_folder, "Plots", "StdMapType3DGeneral.png")
        }
        
        yield (
            gr.update(value="<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>‚úÖ Ramachandran plot analysis completed!</div>", visible=True),
            gr.update(value=plot_files['map2d'], visible=True),
            gr.update(value=plot_files['map3d'], visible=True),
            gr.update(value=plot_files['std2d'], visible=True),
            gr.update(value=plot_files['std3d'], visible=True)
        )
        
    except subprocess.CalledProcessError as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ö†Ô∏è Analysis failed: {e.stderr}</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )


def run_prankweb_prediction():
    """Run PrankWeb prediction on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=None, visible=False)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>üîÆ Processing PrankWeb prediction (this may take several minutes)...</div>", visible=True),
        gr.update(value=None, visible=False)
    )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    output_dir = "prankweb_results"
    os.makedirs(output_dir, exist_ok=True)
    
    absolute_path = os.path.abspath(pdb_path)
    
    # Setup Chrome driver with download preferences and HEADLESS mode
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless=new')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--window-size=1920,1080')
    prefs = {
        "download.default_directory": os.path.abspath(output_dir),
        "download.prompt_for_download": False,
    }
    chrome_options.add_experimental_option("prefs", prefs)
    
    try:
        driver = webdriver.Chrome(options=chrome_options)
        
        driver.get("https://prankweb.cz/")
        time.sleep(3)
        
        wait = WebDriverWait(driver, 30)
        custom_structure = wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Custom structure')]")))
        driver.execute_script("arguments[0].click();", custom_structure)
        time.sleep(1)
        
        file_input = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
        file_input.send_keys(absolute_path)
        time.sleep(2)
        
        submit_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[type='submit']")))
        driver.execute_script("arguments[0].click();", submit_btn)
        
        wait_long = WebDriverWait(driver, 600)
        info_tab = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Info')]")))
        
        driver.execute_script("arguments[0].click();", info_tab)
        time.sleep(2)
        
        download_btn = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Download prediction data')]")))
        driver.execute_script("arguments[0].click();", download_btn)
        
        time.sleep(10)
        driver.quit()
        
        # Find and extract the zip file
        zip_files = [f for f in os.listdir(output_dir) if f.endswith('.zip')]
        if not zip_files:
            return (
                gr.update(value="‚ùå Download failed - no zip file found", visible=True),
                gr.update(value=None, visible=False)
            )
        
        zip_path = os.path.join(output_dir, zip_files[0])
        extract_path = os.path.join(output_dir, zip_files[0].replace('.zip', ''))
        
        os.makedirs(extract_path, exist_ok=True)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        
        # Find the CSV file
        csv_path = os.path.join(extract_path, "structure.pdb_predictions.csv")
        if not os.path.exists(csv_path):
            return (
                gr.update(value="‚ùå CSV file not found in extracted data", visible=True),
                gr.update(value=None, visible=False)
            )
        
        # Read and filter CSV
        df = pd.read_csv(csv_path)
        columns_to_drop = ['residue_ids', 'surf_atom_ids']
        df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')
        
        yield (
            gr.update(value="<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>‚úÖ PrankWeb prediction completed!</div>", visible=True),
            gr.update(value=df, visible=True)
        )
        
    except Exception as e:
        if 'driver' in locals():
            driver.quit()
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=None, visible=False)
        )


def prepare_protein_meeko():
    """Prepare protein using Meeko for docking."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>‚öôÔ∏è Preparing protein with Meeko...</div>", visible=True),
        gr.update(value=""),
        gr.update(value=None)
    )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    
    output_dir = "prepared_protein_meeko"
    os.makedirs(output_dir, exist_ok=True)
    
    output_base = os.path.join(output_dir, "prepared_protein")
    
    cmd = [
        'mk_prepare_receptor.py',
        '-i', pdb_path,
        '-o', output_base,
        '-p',
        '--charge_model', 'gasteiger',
        '--default_altloc', 'A'
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        
        # The output file will be output_base.pdbqt
        pdbqt_path = f"{output_base}.pdbqt"
        
        if not os.path.exists(pdbqt_path):
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå PDBQT file not generated</div>", visible=True),
                gr.update(value=""),
                gr.update(value=None)
            )
            return
        
        # Read PDBQT content
        with open(pdbqt_path, 'r') as f:
            pdbqt_content = f.read()
        
        # Create 3D visualization (PDBQT format is similar to PDB)
        protein_name = f"Prepared Protein ({pdb_id})"
        structure_html = show_structure(pdbqt_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdbqt', delete=False)
        temp_file.write(pdbqt_content)
        temp_file.close()
        
        success_msg = "<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>"
        success_msg += "‚úÖ Protein preparation completed!<br>"
        success_msg += "</div>"
        
        yield (
            gr.update(value=success_msg, visible=True),
            gr.update(value=structure_html),
            gr.update(value=temp_file.name)
        )
        
    except subprocess.CalledProcessError as e:
        error_msg = f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>"
        error_msg += f"‚ö†Ô∏è Preparation failed:<br><small>{e.stderr if e.stderr else str(e)}</small></div>"
        yield (
            gr.update(value=error_msg, visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )


def process_disease(disease_name: str):
    """Main function to process disease and return structure."""
    global current_pdb_info
    
    if not disease_name.strip():
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ö†Ô∏è Please enter a disease or condition", visible=True)
        }
    
    # Map disease to protein
    protein_name = map_disease_to_protein(disease_name)
    
    if not protein_name:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No protein mapping found", visible=True)
        }
    
    # Search PDB
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No PDB structure found", visible=True)
        }
    
    # Download PDB file
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        pdb_content = response.text
        
        # Clean structure
        pdb_content, stats = remove_ligands_from_pdb(pdb_content, 'A')
        
        # Save to proteins folder
        proteins_folder = "proteins"
        os.makedirs(proteins_folder, exist_ok=True)
        pdb_path = os.path.join(proteins_folder, f"{pdb_id}.pdb")
        
        with open(pdb_path, 'w') as f:
            f.write(pdb_content)
        
        # Update global variable
        current_pdb_info = {"pdb_id": pdb_id, "pdb_path": pdb_path}
        
        # Build info display
        info_html = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 16px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.1);">
            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Disease/Condition</div>
                    <div style="font-size: 20px; font-weight: 700;">{disease_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Target Protein</div>
                    <div style="font-size: 20px; font-weight: 700;">{protein_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">PDB Structure ID</div>
                    <div style="font-size: 20px; font-weight: 700;">{pdb_id}</div>
                </div>
            </div>
        </div>
        """
        
        # Create 3D visualization
        structure_html = show_structure(pdb_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False)
        temp_file.write(pdb_content)
        temp_file.close()
        
        return {
            info_box: gr.update(value=info_html, visible=True),
            structure_viewer: gr.update(value=structure_html),
            download_file: gr.update(value=temp_file.name),
            search_status: gr.update(value="‚úÖ Structure loaded successfully!", visible=True)
        }
        
    except Exception as e:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value=f"‚ùå Error: {str(e)}", visible=True)
        }


# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), css="""
    .gradio-container {
        max-width: 1600px !important;
    }
    .main-header {
        text-align: center;
        padding: 40px 20px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 20px;
        color: white;
        margin-bottom: 30px;
    }
    .main-header h1 {
        font-size: 42px;
        font-weight: 800;
        margin: 0 0 10px 0;
    }
    .main-header p {
        font-size: 18px;
        opacity: 0.95;
        margin: 0;
        font-weight: 500;
    }
""", title="Protein Structure Finder & Analyzer") as demo:
    
    gr.HTML("""
        <div class="main-header">
            <h1>üß¨ Protein Structure Finder & Analyzer</h1>
            <p>Discover, visualize and analyze protein structures related to diseases</p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            disease_input = gr.Textbox(
                label="üîç Enter Disease or Condition",
                placeholder="e.g., Alzheimer's Disease, diabetes, inflammation...",
                lines=1
            )
            
            search_btn = gr.Button("üöÄ Search Structure", variant="primary", size="lg")
            
            info_box = gr.HTML(visible=False)
            search_status = gr.Markdown(visible=False)
            download_file = gr.File(label="üíæ Download PDB File", visible=True)
        
        with gr.Column(scale=2):
            structure_viewer = gr.HTML(label="üî¨ 3D Structure Viewer")
    
    # Ramachandran Plot Section
    with gr.Row():
        gr.Markdown("## üìä Ramachandran Plot Analysis")
    
    with gr.Row():
        ramplot_btn = gr.Button("üî¨ Run Ramachandran Analysis", variant="secondary", size="lg")
    
    ramplot_status = gr.HTML(visible=False)
    
    with gr.Row():
        with gr.Column():
            plot1 = gr.Image(label="Map Type 2D All", visible=False)
        with gr.Column():
            plot2 = gr.Image(label="Map Type 3D All", visible=False)
    
    with gr.Row():
        with gr.Column():
            plot3 = gr.Image(label="Std Map Type 2D General Gly", visible=False)
        with gr.Column():
            plot4 = gr.Image(label="Std Map Type 3D General", visible=False)
    
    # PrankWeb Section
    with gr.Row():
        gr.Markdown("## üéØ PrankWeb Binding Site Prediction")
    
    with gr.Row():
        prankweb_btn = gr.Button("üîÆ Run PrankWeb Prediction", variant="secondary", size="lg")
    
    prankweb_status = gr.HTML(visible=False)
    prankweb_results = gr.Dataframe(label="Prediction Results", visible=False)
    
    # Protein Preparation Section
    with gr.Row():
        gr.Markdown("## ‚öôÔ∏è Protein Preparation for Docking (Meeko)")
    
    with gr.Row():
        prepare_btn = gr.Button("üîß Prepare Protein with Meeko", variant="secondary", size="lg")
    
    prepare_status = gr.HTML(visible=False)
    
    with gr.Row():
        with gr.Column(scale=2):
            prepared_viewer = gr.HTML(label="üî¨ Prepared Structure Viewer")
        with gr.Column(scale=1):
            prepared_download = gr.File(label="üíæ Download PDBQT File", visible=True)
    
    # Event handlers
    search_btn.click(
        fn=process_disease,
        inputs=[disease_input],
        outputs={info_box, structure_viewer, download_file, search_status}
    )
    
    ramplot_btn.click(
        fn=run_ramplot,
        inputs=[],
        outputs=[ramplot_status, plot1, plot2, plot3, plot4]
    )
    
    prankweb_btn.click(
        fn=run_prankweb_prediction,
        inputs=[],
        outputs=[prankweb_status, prankweb_results]
    )
    
    prepare_btn.click(
        fn=prepare_protein_meeko,
        inputs=[],
        outputs=[prepare_status, prepared_viewer, prepared_download]
    )

if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7877

To create a public link, set `share=True` in `launch()`.


In [12]:
import gradio as gr
import requests
from pathlib import Path
import base64
import tempfile
import subprocess
import os
import csv
import re
import time
import sys
import zipfile
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Disease to Protein mapping
DISEASE_PROTEIN_MAP = {
    "inflammation": {
        "anti-inflammatory": "Cyclooxygenase-2",
        "cox-2": "Cyclooxygenase-2",
        "pro-inflammatory": "Tumor necrosis factor-alpha",
        "rheumatoid arthritis": "Tumor necrosis factor-alpha",
        "tnf-alpha": "Tumor necrosis factor-alpha"
    },
    "oncology": {
        "neuro tumor": "Vascular endothelial growth factor receptor 2",
        "glioblastoma": "Vascular endothelial growth factor receptor 2",
        "vegfr-2": "Vascular endothelial growth factor receptor 2"
    },
    "metabolic": {
        "pre-diabetes": "Dipeptidyl peptidase 4",
        "diabetes": "Dipeptidyl peptidase 4",
        "dpp-4": "Dipeptidyl peptidase 4",
        "sglt2": "Sodium/glucose cotransporter 2",
        "obesity": "Glucagon-like peptide 1 receptor",
        "glp-1r": "Glucagon-like peptide 1 receptor"
    },
    "neurodegenerative": {
        "parkinson's disease": "Leucine-rich repeat kinase 2",
        "parkinsons disease": "Leucine-rich repeat kinase 2",
        "lrrk2": "Leucine-rich repeat kinase 2",
        "alzheimer's disease": "Beta-secretase 1",
        "alzheimers disease": "Beta-secretase 1",
        "bace1": "Beta-secretase 1"
    }
}

# SWISS-MODEL Configuration
API_TOKEN = "9e8b3ac03b851bb3834cdb311045c78021087d1d"
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

# Global variable to store current PDB info
current_pdb_info = {"pdb_id": None, "pdb_path": None, "prepared_pdbqt": None, "docking_results": None}


def map_disease_to_protein(disease_input: str) -> str:
    """Map a disease name or condition to its protein target."""
    disease_input = disease_input.lower().strip()
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        if disease_input in conditions:
            return conditions[disease_input]
        if disease_input == category:
            return list(conditions.values())[0]
    
    for category, conditions in DISEASE_PROTEIN_MAP.items():
        for condition_key, protein_name in conditions.items():
            if disease_input in condition_key or condition_key in disease_input:
                return protein_name
    
    return None


def search_pdb_for_first_hit(protein_name: str):
    """Search RCSB PDB and return the first result found."""
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        result_set = data.get('result_set', [])
        if not result_set:
            return None
            
        return result_set[0]['identifier']
        
    except requests.exceptions.RequestException:
        return None


def remove_ligands_from_pdb(pdb_content: str, keep_chain: str = 'A') -> tuple[str, dict]:
    """Remove ligands (HETATM) and keep only a single chain."""
    lines = pdb_content.split('\n')
    cleaned_lines = []
    stats = {
        'hetatm_removed': 0,
        'atoms_kept': 0,
        'chains_removed': set(),
        'conect_removed': 0
    }

    atom_serials = set()

    for line in lines:
        if line.startswith('ATOM'):
            try:
                chain_id = line[21].strip()
                serial = int(line[6:11].strip())
                if chain_id == keep_chain:
                    atom_serials.add(serial)
            except (ValueError, IndexError):
                pass

    for line in lines:
        if line.startswith('ATOM'):
            chain_id = line[21].strip()
            if chain_id == keep_chain:
                cleaned_lines.append(line)
                stats['atoms_kept'] += 1
            else:
                stats['chains_removed'].add(chain_id)

        elif line.startswith('HETATM'):
            stats['hetatm_removed'] += 1
            continue

        elif line.startswith('CONECT'):
            try:
                parts = line.split()
                if len(parts) > 1:
                    serials = [int(x) for x in parts[1:] if x.isdigit()]
                    if all(s in atom_serials for s in serials):
                        cleaned_lines.append(line)
                    else:
                        stats['conect_removed'] += 1
            except (ValueError, IndexError):
                stats['conect_removed'] += 1
                continue

        elif line.startswith(('HEADER', 'TITLE', 'COMPND', 'SOURCE', 'KEYWDS',
                             'EXPDTA', 'AUTHOR', 'REVDAT', 'JRNL', 'REMARK',
                             'SEQRES', 'MODRES', 'HELIX', 'SHEET', 'CRYST1',
                             'ORIGX', 'SCALE', 'MTRIX', 'MODEL', 'ENDMDL',
                             'MASTER', 'END', 'TER')):
            cleaned_lines.append(line)

    return '\n'.join(cleaned_lines), stats


def show_structure(pdb_text: str, pdb_id: str, protein_name: str) -> str:
    """Create 3D visualization HTML for PDB structure using base64 encoding."""
    pdb_escaped = pdb_text.replace('\\', '\\\\').replace('`', '\\`').replace('$', '\\$').replace('\r', '')
    
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <script src="https://3Dmol.org/build/3Dmol-min.js"></script>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}
            body {{
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
                background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
                overflow: hidden;
            }}
            #viewer {{
                width: 100vw;
                height: 100vh;
                background: #0a0e27;
            }}
            .info-panel {{
                position: absolute;
                top: 20px;
                left: 20px;
                background: rgba(255, 255, 255, 0.95);
                padding: 16px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                max-width: 280px;
            }}
            .info-panel h2 {{
                margin: 0 0 2px 0;
                font-size: 20px;
                color: #1e3c72;
                font-weight: 700;
            }}
            .info-panel .subtitle {{
                font-size: 12px;
                color: #666;
                margin-bottom: 0;
                font-weight: 500;
            }}
            .controls {{
                position: absolute;
                bottom: 20px;
                left: 50%;
                transform: translateX(-50%);
                background: rgba(255, 255, 255, 0.95);
                padding: 12px 20px;
                border-radius: 12px;
                box-shadow: 0 8px 32px rgba(0,0,0,0.3);
                z-index: 100;
                display: flex;
                gap: 10px;
                align-items: center;
            }}
            .controls h3 {{
                margin: 0 12px 0 0;
                font-size: 14px;
                color: #1e3c72;
                font-weight: 700;
                white-space: nowrap;
            }}
            .controls button {{
                margin: 0;
                padding: 8px 16px;
                border: none;
                border-radius: 8px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                cursor: pointer;
                font-size: 12px;
                font-weight: 600;
                transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
                box-shadow: 0 3px 10px rgba(102, 126, 234, 0.3);
                white-space: nowrap;
            }}
            .controls button:hover {{
                transform: translateY(-2px);
                box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5);
            }}
            .controls button:active {{
                transform: translateY(0);
            }}
            .loading {{
                position: absolute;
                top: 50%;
                left: 50%;
                transform: translate(-50%, -50%);
                color: white;
                font-size: 18px;
                font-weight: 600;
                z-index: 50;
            }}
        </style>
    </head>
    <body>
        <div id="viewer"></div>
        <div class="loading" id="loading">Loading structure...</div>
        <div class="info-panel">
            <h2>{pdb_id}</h2>
            <div class="subtitle">{protein_name}</div>
        </div>
        <div class="controls">
            <h3>Visualization Style</h3>
            <button onclick="setCartoon()">Cartoon</button>
            <button onclick="setStick()">Stick</button>
            <button onclick="setSphere()">Sphere</button>
            <button onclick="setLine()">Line</button>
        </div>
        <script>
            let viewer;
            const pdbData = `{pdb_escaped}`;
            
            window.onload = function() {{
                try {{
                    const element = document.getElementById('viewer');
                    viewer = $3Dmol.createViewer(element, {{
                        backgroundColor: '#0a0e27'
                    }});
                    
                    viewer.addModel(pdbData, "pdb");
                    viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                    viewer.zoomTo();
                    viewer.render();
                    
                    document.getElementById('loading').style.display = 'none';
                }} catch(e) {{
                    console.error('Error loading structure:', e);
                    document.getElementById('loading').textContent = 'Error loading structure';
                }}
            }};
            
            function setCartoon() {{
                viewer.setStyle({{}}, {{'cartoon': {{'color': 'spectrum'}}}});
                viewer.render();
            }}
            
            function setStick() {{
                viewer.setStyle({{}}, {{'stick': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setSphere() {{
                viewer.setStyle({{}}, {{'sphere': {{'colorscheme': 'Jmol'}}}});
                viewer.render();
            }}
            
            function setLine() {{
                viewer.setStyle({{}}, {{'line': {{'colorscheme': 'chainHetatm'}}}});
                viewer.render();
            }}
        </script>
    </body>
    </html>
    """
    
    b64 = base64.b64encode(html_content.encode()).decode()
    iframe = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" frameborder="0" style="border-radius: 12px; box-shadow: 0 8px 32px rgba(0,0,0,0.1);"></iframe>'
    
    return iframe


def run_ramplot():
    """Run RAMPlot analysis on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>üî¨ Processing Ramachandran plot analysis...</div>", visible=True),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False)
    )
    
    pdb_id = current_pdb_info["pdb_id"]
    input_folder = "proteins/"
    output_folder = "my_analysis_folder"
    
    os.makedirs(input_folder, exist_ok=True)
    os.makedirs(output_folder, exist_ok=True)
    
    cmd = [
        "ramplot", "pdb",
        "-i", input_folder,
        "-o", output_folder,
        "-m", "0",
        "-r", "600",
        "-p", "png"
    ]
    
    try:
        result = subprocess.run(cmd, check=True, text=True, capture_output=True)
        
        # Look for the generated plot files
        plot_files = {
            'map2d': os.path.join(output_folder, "Plots", "MapType2DAll.png"),
            'map3d': os.path.join(output_folder, "Plots", "MapType3DAll.png"),
            'std2d': os.path.join(output_folder, "Plots", "StdMapType2DGeneralGly.png"),
            'std3d': os.path.join(output_folder, "Plots", "StdMapType3DGeneral.png")
        }
        
        yield (
            gr.update(value="<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>‚úÖ Ramachandran plot analysis completed!</div>", visible=True),
            gr.update(value=plot_files['map2d'], visible=True),
            gr.update(value=plot_files['map3d'], visible=True),
            gr.update(value=plot_files['std2d'], visible=True),
            gr.update(value=plot_files['std3d'], visible=True)
        )
        
    except subprocess.CalledProcessError as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ö†Ô∏è Analysis failed: {e.stderr}</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False),
            gr.update(value=None, visible=False)
        )


def run_prankweb_prediction():
    """Run PrankWeb prediction on the current PDB file."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=None, visible=False)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>üîÆ Processing PrankWeb prediction (this may take several minutes)...</div>", visible=True),
        gr.update(value=None, visible=False)
    )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    output_dir = "prankweb_results"
    os.makedirs(output_dir, exist_ok=True)
    
    absolute_path = os.path.abspath(pdb_path)
    
    # Setup Chrome driver with download preferences and HEADLESS mode
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless=new')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--window-size=1920,1080')
    prefs = {
        "download.default_directory": os.path.abspath(output_dir),
        "download.prompt_for_download": False,
    }
    chrome_options.add_experimental_option("prefs", prefs)
    
    try:
        driver = webdriver.Chrome(options=chrome_options)
        
        driver.get("https://prankweb.cz/")
        time.sleep(3)
        
        wait = WebDriverWait(driver, 30)
        custom_structure = wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Custom structure')]")))
        driver.execute_script("arguments[0].click();", custom_structure)
        time.sleep(1)
        
        file_input = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
        file_input.send_keys(absolute_path)
        time.sleep(2)
        
        submit_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[type='submit']")))
        driver.execute_script("arguments[0].click();", submit_btn)
        
        wait_long = WebDriverWait(driver, 600)
        info_tab = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Info')]")))
        
        driver.execute_script("arguments[0].click();", info_tab)
        time.sleep(2)
        
        download_btn = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Download prediction data')]")))
        driver.execute_script("arguments[0].click();", download_btn)
        
        time.sleep(10)
        driver.quit()
        
        # Find and extract the zip file
        zip_files = [f for f in os.listdir(output_dir) if f.endswith('.zip')]
        if not zip_files:
            return (
                gr.update(value="‚ùå Download failed - no zip file found", visible=True),
                gr.update(value=None, visible=False)
            )
        
        zip_path = os.path.join(output_dir, zip_files[0])
        extract_path = os.path.join(output_dir, zip_files[0].replace('.zip', ''))
        
        os.makedirs(extract_path, exist_ok=True)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        
        # Find the CSV file
        csv_path = os.path.join(extract_path, "structure.pdb_predictions.csv")
        if not os.path.exists(csv_path):
            return (
                gr.update(value="‚ùå CSV file not found in extracted data", visible=True),
                gr.update(value=None, visible=False)
            )
        
        # Read and filter CSV
        df = pd.read_csv(csv_path)
        columns_to_drop = ['residue_ids', 'surf_atom_ids']
        df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')
        
        yield (
            gr.update(value="<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>‚úÖ PrankWeb prediction completed!</div>", visible=True),
            gr.update(value=df, visible=True)
        )
        
    except Exception as e:
        if 'driver' in locals():
            driver.quit()
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=None, visible=False)
        )


def prepare_protein_meeko():
    """Prepare protein using Meeko for docking."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>‚öôÔ∏è Preparing protein with Meeko...</div>", visible=True),
        gr.update(value=""),
        gr.update(value=None)
    )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    
    output_dir = "prepared_protein_meeko"
    os.makedirs(output_dir, exist_ok=True)
    
    output_base = os.path.join(output_dir, "prepared_protein")
    
    cmd = [
        'mk_prepare_receptor.py',
        '-i', pdb_path,
        '-o', output_base,
        '-p',
        '--charge_model', 'gasteiger',
        '--default_altloc', 'A'
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        
        # The output file will be output_base.pdbqt
        pdbqt_path = f"{output_base}.pdbqt"
        
        if not os.path.exists(pdbqt_path):
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå PDBQT file not generated</div>", visible=True),
                gr.update(value=""),
                gr.update(value=None)
            )
            return
        
        # Store prepared protein path globally
        current_pdb_info["prepared_pdbqt"] = pdbqt_path
        
        # Read PDBQT content
        with open(pdbqt_path, 'r') as f:
            pdbqt_content = f.read()
        
        # Create 3D visualization (PDBQT format is similar to PDB)
        protein_name = f"Prepared Protein ({pdb_id})"
        structure_html = show_structure(pdbqt_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdbqt', delete=False)
        temp_file.write(pdbqt_content)
        temp_file.close()
        
        success_msg = "<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>"
        success_msg += "‚úÖ Protein preparation completed!<br>"
        success_msg += f"<small>Output: {pdbqt_path}</small>"
        if result.stdout:
            success_msg += f"<br><small>{result.stdout}</small>"
        success_msg += "</div>"
        
        yield (
            gr.update(value=success_msg, visible=True),
            gr.update(value=structure_html),
            gr.update(value=temp_file.name)
        )
        
    except subprocess.CalledProcessError as e:
        error_msg = f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>"
        error_msg += f"‚ö†Ô∏è Preparation failed:<br><small>{e.stderr if e.stderr else str(e)}</small></div>"
        yield (
            gr.update(value=error_msg, visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )


def run_molecular_docking():
    """Run molecular docking using AutoDock Vina."""
    if not current_pdb_info.get("prepared_pdbqt"):
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No prepared protein found. Please prepare protein first.</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(choices=[], visible=False)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>üî¨ Running molecular docking (this may take several minutes)...</div>", visible=True),
        gr.update(value=None, visible=False),
        gr.update(choices=[], visible=False)
    )
    
    try:
        from vina import Vina
        import glob
        
        # Input files and directories
        csv_file = "prankweb_results/prankweb-3IXK_chainA_clean/structure.pdb_predictions.csv"
        protein_pdbqt = current_pdb_info["prepared_pdbqt"]
        ligand_folder = "pdbqt"
        
        # Check if PrankWeb results exist
        if not os.path.exists(csv_file):
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå PrankWeb results not found. Please run PrankWeb prediction first.</div>", visible=True),
                gr.update(value=None, visible=False),
                gr.update(choices=[], visible=False)
            )
            return
        
        # Check if ligand folder exists
        if not os.path.exists(ligand_folder):
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Ligand folder 'pdbqt' not found. Please ensure ligands are prepared.</div>", visible=True),
                gr.update(value=None, visible=False),
                gr.update(choices=[], visible=False)
            )
            return
        
        # Load pocket table
        df = pd.read_csv(csv_file)
        
        # Get all ligand PDBQT files
        ligand_files = glob.glob(os.path.join(ligand_folder, "*.pdbqt"))
        
        if not ligand_files:
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No ligand files found in 'pdbqt' folder.</div>", visible=True),
                gr.update(value=None, visible=False),
                gr.update(choices=[], visible=False)
            )
            return
        
        # Ensure output folders exist
        output_dir_pdbqt = "docking_results/pdbqt"
        output_dir_pdb = "docking_results/pdb"
        os.makedirs(output_dir_pdbqt, exist_ok=True)
        os.makedirs(output_dir_pdb, exist_ok=True)
        
        # Initialize summary data
        summary_data = []
        
        # Helper functions
        def convert_pdbqt_to_pdb(pdbqt_file, pdb_file):
            try:
                subprocess.run(['obabel', pdbqt_file, '-O', pdb_file, '-h'], 
                             check=True, capture_output=True)
                return True
            except:
                return False
        
        def save_protein_ligand_complex(protein_pdbqt, ligand_poses_pdbqt, output_pdb, ligand_name, pocket_name):
            try:
                protein_pdb = f"temp_protein_{ligand_name}_{pocket_name}.pdb"
                subprocess.run(['obabel', protein_pdbqt, '-O', protein_pdb, '-h'], 
                             check=True, capture_output=True)
                
                ligand_pdb = f"temp_ligand_{ligand_name}_{pocket_name}.pdb"
                subprocess.run(['obabel', ligand_poses_pdbqt, '-O', ligand_pdb, '-h'], 
                             check=True, capture_output=True)
                
                with open(output_pdb, 'w') as outfile:
                    with open(protein_pdb, 'r') as prot:
                        outfile.write(prot.read())
                    with open(ligand_pdb, 'r') as lig:
                        outfile.write(lig.read())
                
                os.remove(protein_pdb)
                os.remove(ligand_pdb)
                return True
            except:
                return False
        
        # Iterate over each ligand file
        for ligand_pdbqt in ligand_files:
            ligand_name = os.path.splitext(os.path.basename(ligand_pdbqt))[0]
            ligand_best_poses = []
            
            # Iterate over each pocket for this ligand
            for index, row in df.iterrows():
                pocket_name = row['name     '].strip()
                center = [float(row['   center_x']), float(row['   center_y']), float(row['   center_z'])]
                
                # Initialize Vina
                v = Vina()
                v.set_receptor(rigid_pdbqt_filename=protein_pdbqt)
                v.set_ligand_from_file(ligand_pdbqt)
                
                # Define docking box
                v.compute_vina_maps(center=center, box_size=[25, 25, 25])
                
                # Perform docking
                v.dock(exhaustiveness=8, n_poses=10)
                
                # Get scores
                scores = v.energies(n_poses=10)
                
                # Filter poses with binding energy ‚â§ -7.0 kcal/mol
                good_poses = [(i+1, score[0]) for i, score in enumerate(scores) if score[0] <= -7.0]
                
                if good_poses:
                    for pose_num, energy in good_poses:
                        ligand_best_poses.append({
                            'ligand': ligand_name,
                            'pocket': pocket_name,
                            'pose_number': pose_num,
                            'binding_energy': energy,
                            'center_x': center[0],
                            'center_y': center[1],
                            'center_z': center[2]
                        })
                
                # Save all poses for this pocket-ligand combination
                pdbqt_file = os.path.join(output_dir_pdbqt, f"{ligand_name}_{pocket_name}_docked_poses.pdbqt")
                v.write_poses(pdbqt_file, n_poses=10, overwrite=True)
                
                # Save ligand-only PDB
                pdb_ligand_only = os.path.join(output_dir_pdb, f"{ligand_name}_{pocket_name}_ligand_poses.pdb")
                convert_pdbqt_to_pdb(pdbqt_file, pdb_ligand_only)
                
                # Save protein + ligand complex PDB
                pdb_complex = os.path.join(output_dir_pdb, f"{ligand_name}_{pocket_name}_complex.pdb")
                save_protein_ligand_complex(protein_pdbqt, pdbqt_file, pdb_complex, ligand_name, pocket_name)
            
            # Select top 3 poses for this ligand (across all pockets)
            if ligand_best_poses:
                ligand_best_poses.sort(key=lambda x: x['binding_energy'])
                top_3 = ligand_best_poses[:3]
                summary_data.extend(top_3)
        
        # Create summary table
        if summary_data:
            summary_df = pd.DataFrame(summary_data)
            summary_df = summary_df[['ligand', 'pocket', 'pose_number', 'binding_energy', 
                                    'center_x', 'center_y', 'center_z']]
            summary_df['binding_energy'] = summary_df['binding_energy'].round(3)
            summary_df['center_x'] = summary_df['center_x'].round(2)
            summary_df['center_y'] = summary_df['center_y'].round(2)
            summary_df['center_z'] = summary_df['center_z'].round(2)
            
            # Save summary table
            summary_file = "docking_results/docking_summary.csv"
            summary_df.to_csv(summary_file, index=False)
            
            # Store in global variable
            current_pdb_info["docking_results"] = summary_df
            
            # Create dropdown choices
            choices = []
            for idx, row in summary_df.iterrows():
                label = f"{row['ligand']} - {row['pocket']} (Pose {row['pose_number']}) | {row['binding_energy']:.2f} kcal/mol"
                choices.append(label)
            
            success_msg = "<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>"
            success_msg += f"‚úÖ Docking completed!<br>"
            success_msg += f"<small>Processed {len(ligand_files)} ligands</small><br>"
            success_msg += f"<small>Found {len(summary_data)} good poses (‚â§ -7.0 kcal/mol)</small><br>"
            success_msg += f"<small>Best energy: {summary_df['binding_energy'].min():.2f} kcal/mol</small>"
            success_msg += "</div>"
            
            yield (
                gr.update(value=success_msg, visible=True),
                gr.update(value=summary_df, visible=True),
                gr.update(choices=choices, visible=True, value=choices[0] if choices else None)
            )
        else:
            yield (
                gr.update(value="<div style='padding: 20px; background: #f8d7da; border-radius: 8px; color: #721c24;'>‚ö†Ô∏è No poses with binding energy ‚â§ -7.0 kcal/mol found</div>", visible=True),
                gr.update(value=None, visible=False),
                gr.update(choices=[], visible=False)
            )
    
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=None, visible=False),
            gr.update(choices=[], visible=False)
        )


def display_docked_structure(selected_pose):
    """Display the selected docked structure in 3D."""
    if not selected_pose:
        return gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Please select a pose to display</div>")
    
    if "docking_results" not in current_pdb_info:
        return gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No docking results available</div>")
    
    try:
        # Parse the selected pose label
        # Format: "ligand - pocket (Pose N) | energy kcal/mol"
        parts = selected_pose.split(" - ")
        ligand_name = parts[0].strip()
        
        pocket_part = parts[1].split(" (Pose ")[0].strip()
        pose_num_part = parts[1].split(" (Pose ")[1].split(")")[0].strip()
        
        # Find the complex PDB file
        complex_file = f"docking_results/pdb/{ligand_name}_{pocket_part}_complex.pdb"
        
        if not os.path.exists(complex_file):
            return gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Complex file not found: {complex_file}</div>")
        
        # Read PDB content
        with open(complex_file, 'r') as f:
            pdb_content = f.read()
        
        # Create 3D visualization
        structure_html = show_structure(pdb_content, f"{ligand_name}_{pocket_part}", f"Docked Complex - {selected_pose}")
        
        return gr.update(value=structure_html)
        
    except Exception as e:
        return gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error displaying structure: {str(e)}</div>")


def process_disease(disease_name: str):
    """Prepare protein using Meeko for docking."""
    if not current_pdb_info["pdb_id"] or not current_pdb_info["pdb_path"]:
        return (
            gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå No structure loaded. Please search for a disease first.</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    
    # Show processing message
    yield (
        gr.update(value="<div style='padding: 20px; background: #fff3cd; border-radius: 8px; color: #856404;'>‚öôÔ∏è Preparing protein with Meeko...</div>", visible=True),
        gr.update(value=""),
        gr.update(value=None)
    )
    
    pdb_path = current_pdb_info["pdb_path"]
    pdb_id = current_pdb_info["pdb_id"]
    
    output_dir = "prepared_protein_meeko"
    os.makedirs(output_dir, exist_ok=True)
    
    output_base = os.path.join(output_dir, "prepared_protein")
    
    cmd = [
        'mk_prepare_receptor.py',
        '-i', pdb_path,
        '-o', output_base,
        '-p',
        '--charge_model', 'gasteiger',
        '--default_altloc', 'A'
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        
        # The output file will be output_base.pdbqt
        pdbqt_path = f"{output_base}.pdbqt"
        
        if not os.path.exists(pdbqt_path):
            yield (
                gr.update(value="<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå PDBQT file not generated</div>", visible=True),
                gr.update(value=""),
                gr.update(value=None)
            )
            return
        
        # Read PDBQT content
        with open(pdbqt_path, 'r') as f:
            pdbqt_content = f.read()
        
        # Create 3D visualization (PDBQT format is similar to PDB)
        protein_name = f"Prepared Protein ({pdb_id})"
        structure_html = show_structure(pdbqt_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdbqt', delete=False)
        temp_file.write(pdbqt_content)
        temp_file.close()
        
        success_msg = "<div style='padding: 20px; background: #d4edda; border-radius: 8px; color: #155724;'>"
        success_msg += "‚úÖ Protein preparation completed!<br>"
        success_msg += f"<small>Output: {pdbqt_path}</small>"
        if result.stdout:
            success_msg += f"<br><small>{result.stdout}</small>"
        success_msg += "</div>"
        
        yield (
            gr.update(value=success_msg, visible=True),
            gr.update(value=structure_html),
            gr.update(value=temp_file.name)
        )
        
    except subprocess.CalledProcessError as e:
        error_msg = f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>"
        error_msg += f"‚ö†Ô∏è Preparation failed:<br><small>{e.stderr if e.stderr else str(e)}</small></div>"
        yield (
            gr.update(value=error_msg, visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )
    except Exception as e:
        yield (
            gr.update(value=f"<div style='padding: 20px; background: #fee; border-radius: 8px; color: #c33;'>‚ùå Error: {str(e)}</div>", visible=True),
            gr.update(value=""),
            gr.update(value=None)
        )


def process_disease(disease_name: str):
    """Main function to process disease and return structure."""
    global current_pdb_info
    
    if not disease_name.strip():
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ö†Ô∏è Please enter a disease or condition", visible=True)
        }
    
    # Map disease to protein
    protein_name = map_disease_to_protein(disease_name)
    
    if not protein_name:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No protein mapping found", visible=True)
        }
    
    # Search PDB
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value="‚ùå No PDB structure found", visible=True)
        }
    
    # Download PDB file
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        pdb_content = response.text
        
        # Clean structure
        pdb_content, stats = remove_ligands_from_pdb(pdb_content, 'A')
        
        # Save to proteins folder
        proteins_folder = "proteins"
        os.makedirs(proteins_folder, exist_ok=True)
        pdb_path = os.path.join(proteins_folder, f"{pdb_id}.pdb")
        
        with open(pdb_path, 'w') as f:
            f.write(pdb_content)
        
        # Update global variable
        current_pdb_info = {"pdb_id": pdb_id, "pdb_path": pdb_path}
        
        # Build info display
        info_html = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 16px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.1);">
            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Disease/Condition</div>
                    <div style="font-size: 20px; font-weight: 700;">{disease_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Target Protein</div>
                    <div style="font-size: 20px; font-weight: 700;">{protein_name}</div>
                </div>
                <div>
                    <div style="font-size: 13px; opacity: 0.9; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">PDB Structure ID</div>
                    <div style="font-size: 20px; font-weight: 700;">{pdb_id}</div>
                </div>
            </div>
        </div>
        """
        
        # Create 3D visualization
        structure_html = show_structure(pdb_content, pdb_id, protein_name)
        
        # Create download file
        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False)
        temp_file.write(pdb_content)
        temp_file.close()
        
        return {
            info_box: gr.update(value=info_html, visible=True),
            structure_viewer: gr.update(value=structure_html),
            download_file: gr.update(value=temp_file.name),
            search_status: gr.update(value="‚úÖ Structure loaded successfully!", visible=True)
        }
        
    except Exception as e:
        current_pdb_info = {"pdb_id": None, "pdb_path": None}
        return {
            info_box: gr.update(visible=False),
            structure_viewer: gr.update(value=""),
            download_file: gr.update(value=None),
            search_status: gr.update(value=f"‚ùå Error: {str(e)}", visible=True)
        }


# Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), css="""
    .gradio-container {
        max-width: 1600px !important;
    }
    .main-header {
        text-align: center;
        padding: 40px 20px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 20px;
        color: white;
        margin-bottom: 30px;
    }
    .main-header h1 {
        font-size: 42px;
        font-weight: 800;
        margin: 0 0 10px 0;
    }
    .main-header p {
        font-size: 18px;
        opacity: 0.95;
        margin: 0;
        font-weight: 500;
    }
""", title="Protein Structure Finder & Analyzer") as demo:
    
    gr.HTML("""
        <div class="main-header">
            <h1>üß¨ Protein Structure Finder & Analyzer</h1>
            <p>Discover, visualize and analyze protein structures related to diseases</p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            disease_input = gr.Textbox(
                label="üîç Enter Disease or Condition",
                placeholder="e.g., Alzheimer's Disease, diabetes, inflammation...",
                lines=1
            )
            
            search_btn = gr.Button("üöÄ Search Structure", variant="primary", size="lg")
            
            info_box = gr.HTML(visible=False)
            search_status = gr.Markdown(visible=False)
            download_file = gr.File(label="üíæ Download PDB File", visible=True)
        
        with gr.Column(scale=2):
            structure_viewer = gr.HTML(label="üî¨ 3D Structure Viewer")
    
    # Ramachandran Plot Section
    with gr.Row():
        gr.Markdown("## üìä Ramachandran Plot Analysis")
    
    with gr.Row():
        ramplot_btn = gr.Button("üî¨ Run Ramachandran Analysis", variant="secondary", size="lg")
    
    ramplot_status = gr.HTML(visible=False)
    
    with gr.Row():
        with gr.Column():
            plot1 = gr.Image(label="Map Type 2D All", visible=False)
        with gr.Column():
            plot2 = gr.Image(label="Map Type 3D All", visible=False)
    
    with gr.Row():
        with gr.Column():
            plot3 = gr.Image(label="Std Map Type 2D General Gly", visible=False)
        with gr.Column():
            plot4 = gr.Image(label="Std Map Type 3D General", visible=False)
    
    # PrankWeb Section
    with gr.Row():
        gr.Markdown("## üéØ PrankWeb Binding Site Prediction")
    
    with gr.Row():
        prankweb_btn = gr.Button("üîÆ Run PrankWeb Prediction", variant="secondary", size="lg")
    
    prankweb_status = gr.HTML(visible=False)
    prankweb_results = gr.Dataframe(label="Prediction Results", visible=False)
    
    # Protein Preparation Section
    with gr.Row():
        gr.Markdown("## ‚öôÔ∏è Protein Preparation for Docking (Meeko)")
    
    with gr.Row():
        prepare_btn = gr.Button("üîß Prepare Protein with Meeko", variant="secondary", size="lg")
    
    prepare_status = gr.HTML(visible=False)
    
    with gr.Row():
        with gr.Column(scale=2):
            prepared_viewer = gr.HTML(label="üî¨ Prepared Structure Viewer")
        with gr.Column(scale=1):
            prepared_download = gr.File(label="üíæ Download PDBQT File", visible=True)
    
    # Molecular Docking Section
    with gr.Row():
        gr.Markdown("## üéØ Molecular Docking (AutoDock Vina)")
    
    with gr.Row():
        docking_btn = gr.Button("üöÄ Run Molecular Docking", variant="secondary", size="lg")
    
    docking_status = gr.HTML(visible=False)
    docking_summary = gr.Dataframe(label="Docking Summary - Top 3 Poses per Ligand", visible=False)
    
    with gr.Row():
        gr.Markdown("### üìä View Docked Structures")
    
    with gr.Row():
        with gr.Column(scale=1):
            pose_selector = gr.Dropdown(
                label="Select Pose to View",
                choices=[],
                visible=False,
                interactive=True
            )
            view_pose_btn = gr.Button("üëÅÔ∏è View Selected Pose", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            docked_viewer = gr.HTML(label="üî¨ Docked Complex Viewer")
    
    # Event handlers
    search_btn.click(
        fn=process_disease,
        inputs=[disease_input],
        outputs={info_box, structure_viewer, download_file, search_status}
    )
    
    ramplot_btn.click(
        fn=run_ramplot,
        inputs=[],
        outputs=[ramplot_status, plot1, plot2, plot3, plot4]
    )
    
    prankweb_btn.click(
        fn=run_prankweb_prediction,
        inputs=[],
        outputs=[prankweb_status, prankweb_results]
    )
    
    prepare_btn.click(
        fn=prepare_protein_meeko,
        inputs=[],
        outputs=[prepare_status, prepared_viewer, prepared_download]
    )
    
    docking_btn.click(
        fn=run_molecular_docking,
        inputs=[],
        outputs=[docking_status, docking_summary, pose_selector]
    )
    
    view_pose_btn.click(
        fn=display_docked_structure,
        inputs=[pose_selector],
        outputs=[docked_viewer]
    )

if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7878

To create a public link, set `share=True` in `launch()`.


Computing Vina grid ... done.
Performing docking (random seed: -1368157690) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
   1       -7.844          0          0
   2       -7.507      2.832      7.151
   3       -7.434      3.406      6.363
   4       -7.411      6.316      8.586
   5       -7.304      6.482      8.918
   6       -7.158      3.458      7.058
   7       -7.134      1.966      2.807
   8       -7.107       2.68      3.725
   9       -7.075      2.999      7.276
  10       -7.012       2.06      6.479
Computing Vina grid ... done.
Performing docking (random seed: -218020667) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
**************************************************