In [1]:
import requests
from pathlib import Path
import sys

def search_pdb_for_first_hit(protein_name: str):
    """
    Search RCSB PDB and return the first result found.
    """
    print(f"Searching PDB for the first hit of: {protein_name}")
    print("=" * 70)
    
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        # Get the identifier of the first result, if it exists
        result_set = data.get('result_set', [])
        if not result_set:
            print(f"✗ No structures found for '{protein_name}'")
            return None
            
        first_pdb_id = result_set[0]['identifier']
        print(f"✓ Found first hit: {first_pdb_id}\n")
        return first_pdb_id
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error searching PDB: {e}")
        return None


def download_pdb_file(pdb_id: str, output_dir: str = "proteins") -> Path:
    """Download PDB file from RCSB PDB"""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    pdb_id = pdb_id.strip().upper()
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    output_file = output_path / f"{pdb_id}.pdb"
    
    print(f"Downloading PDB file for {pdb_id}...")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        with open(output_file, 'wb') as f:
            f.write(response.content)
        
        file_size = output_file.stat().st_size / 1024
        print(f"✓ Successfully downloaded: {output_file} ({file_size:.2f} KB)\n")
        return output_file
        
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            print(f"✗ Error: PDB ID '{pdb_id}' not found")
        else:
            print(f"✗ HTTP Error: {e}")
        return None
    except Exception as e:
        print(f"✗ Error downloading file: {e}")
        return None


def download_fasta_file(pdb_id: str, output_dir: str = "proteins") -> Path:
    """Download FASTA sequence for a given PDB ID"""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    pdb_id = pdb_id.strip().upper()
    url = f"https://www.rcsb.org/fasta/entry/{pdb_id}"
    output_file = output_path / f"{pdb_id}.fasta"
    
    print(f"Downloading FASTA sequence for {pdb_id}...")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        if not response.text.strip().startswith(">"):
            print(f"✗ No valid FASTA data found for {pdb_id}")
            return None
        
        with open(output_file, 'w') as f:
            f.write(response.text)
        
        num_lines = len(response.text.splitlines())
        print(f"✓ Successfully downloaded: {output_file} ({num_lines} lines)\n")
        return output_file
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error downloading FASTA: {e}")
        return None


def download_first_protein_structure(protein_name: str, output_dir: str = "proteins"):
    """
    Search for a protein, find the first PDB ID, and download its PDB and FASTA files.
    """
    print("\n" + "=" * 70)
    print(f"STEP 1: Find and Download PDB/FASTA for '{protein_name}'")
    print("=" * 70 + "\n")
    
    # Find the first PDB ID for the given protein name
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        print("=" * 70)
        print("Process stopped: No PDB ID was found.")
        print("=" * 70)
        return
    
    print("=" * 70)
    print(f"STEP 2: Downloading files for PDB ID: {pdb_id}")
    print("=" * 70 + "\n")
    
    # Download the corresponding PDB and FASTA files
    pdb_file = download_pdb_file(pdb_id, output_dir)
    fasta_file = download_fasta_file(pdb_id, output_dir)
    
    print("=" * 70)
    print("SUMMARY")
    print("=" * 70)
    if pdb_file or fasta_file:
        print(f"  ✓ Process complete for {pdb_id}.")
        if pdb_file:
            print(f"    PDB file saved to: {pdb_file}")
        if fasta_file:
            print(f"    FASTA file saved to: {fasta_file}")
    else:
        print(f"  ✗ Failed to download files for {pdb_id}.")
    print("=" * 70 + "\n")


# Example usage
if __name__ == "__main__":
    print("\n" + "🔬 " * 20)
    protein_name = "VEGFR-2"
    download_first_protein_structure(
        protein_name=protein_name,
        output_dir="proteins"
    )


🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 

STEP 1: Find and Download PDB/FASTA for 'VEGFR-2'

Searching PDB for the first hit of: VEGFR-2
✓ Found first hit: 3V2A

STEP 2: Downloading files for PDB ID: 3V2A

Downloading PDB file for 3V2A...
✓ Successfully downloaded: proteins/3V2A.pdb (429.68 KB)

Downloading FASTA sequence for 3V2A...
✓ Successfully downloaded: proteins/3V2A.fasta (4 lines)

SUMMARY
  ✓ Process complete for 3V2A.
    PDB file saved to: proteins/3V2A.pdb
    FASTA file saved to: proteins/3V2A.fasta



In [18]:
import subprocess

# Define arguments
input_folder = "proteins/"
output_folder = "my_analysis_folder"
map_type = "0"
resolution = "600"
plot_type = "png"

# Construct command as list
cmd = [
    "ramplot", "pdb",
    "-i", input_folder,
    "-o", output_folder,
    "-m", map_type,
    "-r", resolution,
    "-p", plot_type
]

# Run the command
try:
    result = subprocess.run(cmd, check=True, text=True, capture_output=True)
    print("✅ RAMPlot completed successfully!")
    print("STDOUT:\n", result.stdout)
except subprocess.CalledProcessError as e:
    print("❌ Error running RAMPlot:")
    print("STDERR:\n", e.stderr)


✅ RAMPlot completed successfully!
STDOUT:
 Input Directory: proteins/

Output Directory: my_analysis_folder

Plot Resolutions: 600

Plot File Type: png

Plot Ramachandran Map : 2D & 3D All
Torsion Angle Calculation 
3V2A.fasta
3V2A.pdb
2D Ramachandran Plot All
3D Ramachandran Plot All
2D Ramachandran Plot
3D Ramachandran Plot 
[['proteins//3V2A.pdb Chain: R ResNum:133 ResName:HIS' '3V2A' 'R' ...
  'Trans' 'No' 'General']
 ['proteins//3V2A.pdb Chain: R ResNum:134 ResName:GLY' '3V2A' 'R' ...
  'Trans' 'No' 'Gly']
 ['proteins//3V2A.pdb Chain: R ResNum:135 ResName:VAL' '3V2A' 'R' ...
  'Trans' 'No' 'General']
 ...
 ['proteins//3V2A.pdb Chain: A ResNum:104 ResName:CYS' '3V2A' 'A' ...
  'Trans' 'No' 'General']
 ['proteins//3V2A.pdb Chain: A ResNum:105 ResName:ARG' '3V2A' 'A' ...
  'Trans' 'No' 'General']
 ['proteins//3V2A.pdb Chain: A ResNum:106 ResName:PRO' '3V2A' 'A' ...
  'Trans' 'No' 'General']]



In [8]:
import requests
import time
import sys
import os
import csv
import re

# --- 1. SET YOUR INPUTS HERE ---

# Get your API token from: https://swissmodel.expasy.org/token
API_TOKEN = "9e8b3ac03b851bb3834cdb311045c78021087d1d" 

# Set the path to your .fasta file
FASTA_FILE_PATH = "proteins/3V2A.fasta" # Your file path

# Set the path to your CSV file containing the Favoured percentage
CSV_FILE_PATH = "my_analysis_folder/Analysis.csv"  # Change this to your CSV file path

# Give your job a title (you can change this)
PROJECT_TITLE = "My_Protein_Homology_Model"

# --- 2. CHECK FAVOURED PERCENTAGE FROM CSV ---
def extract_favoured_percentage(csv_path):
    """
    Extract the Favoured percentage from CSV file.
    Looks for pattern like: Favoured: ,232,(85.294%)
    """
    try:
        with open(csv_path, 'r') as f:
            content = f.read()
            
            # Search for pattern: Favoured: ,xxx,(yy.yyy%)
            match = re.search(r'Favoured:\s*,\d+,\((\d+\.?\d*)%\)', content)
            
            if match:
                percentage = float(match.group(1))
                return percentage
            else:
                print(f"Error: Could not find 'Favoured:' percentage in {csv_path}")
                print("Expected format: Favoured: ,XXX,(YY.YYY%)")
                return None
                
    except FileNotFoundError:
        print(f"Error: CSV file not found at '{csv_path}'")
        return None
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None

# Check the Favoured percentage
print("Checking Favoured percentage from CSV...")
favoured_percent = extract_favoured_percentage(CSV_FILE_PATH)

if favoured_percent is None:
    print("Failed to extract Favoured percentage. Exiting.")
    sys.exit(1)

print(f"Favoured percentage found: {favoured_percent}%")

# Check if we should proceed
if favoured_percent >= 90.0:
    print(f"\nFavoured percentage ({favoured_percent}%) is >= 90%.")
    print("SWISS-MODEL execution is NOT required. Exiting.")
    sys.exit(0)
else:
    print(f"\nFavoured percentage ({favoured_percent}%) is < 90%.")
    print("Proceeding with SWISS-MODEL homology modeling...\n")

# --- 3. SCRIPT SETUP ---
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

if "YOUR_API_TOKEN_HERE" in API_TOKEN:
    print("Error: Please replace 'YOUR_API_TOKEN_HERE' with your actual API token.")
    sys.exit(1)

# --- 4. Read and PARSE the FASTA file for single or multiple sequences ---
try:
    sequences = []
    current_sequence = []
    
    with open(FASTA_FILE_PATH, 'r') as f:
        for line in f:
            line = line.strip()
            
            # If we encounter a header line
            if line.startswith('>'):
                # If we have a sequence accumulated, save it
                if current_sequence:
                    sequences.append("".join(current_sequence))
                    current_sequence = []
            else:
                # Add sequence line (ignore empty lines)
                if line:
                    current_sequence.append(line)
        
        # Don't forget the last sequence
        if current_sequence:
            sequences.append("".join(current_sequence))
    
    if not sequences:
        print(f"Error: No valid sequences found in '{FASTA_FILE_PATH}'.")
        print("Please ensure it is a valid FASTA file.")
        sys.exit(1)
    
    # Validate sequences
    for i, seq in enumerate(sequences):
        if len(seq) < 50:
            print(f"Warning: Sequence {i+1} is very short ({len(seq)} residues).")
    
    print(f"Successfully read and parsed FASTA file: {FASTA_FILE_PATH}")
    print(f"Number of sequences found: {len(sequences)}")
    
    # If single sequence, pass as string; if multiple, pass as list
    if len(sequences) == 1:
        FASTA_INPUT = sequences[0]
        print(f"Single sequence detected with {len(FASTA_INPUT)} residues")
    else:
        FASTA_INPUT = sequences
        print(f"Multiple sequences detected: {[len(s) for s in sequences]} residues each")

except FileNotFoundError:
    print(f"Error: File not found at '{FASTA_FILE_PATH}'.")
    print("Please check the path and filename.")
    sys.exit(1)
except Exception as e:
    print(f"Error reading file: {e}")
    sys.exit(1)


# --- 5. SUBMIT MODELING JOB ---
print(f"\nSubmitting job '{PROJECT_TITLE}' to SWISS-MODEL...")
payload = {
    "target_sequences": FASTA_INPUT,  # Now handles both string and list
    "project_title": PROJECT_TITLE
}

# Use the /automodel/ endpoint
try:
    submit_response = requests.post(
        f"{BASE_URL}/automodel/", 
        headers=HEADERS, 
        json=payload
    )
    submit_response.raise_for_status() # Raises an error for 4xx or 5xx responses

    project_id = submit_response.json().get("project_id")
    print(f"Job submitted successfully! Project ID: {project_id}")

except requests.exceptions.HTTPError as e:
    print(f"Error submitting job: {e.response.status_code}")
    print(f"Details: {e.response.text}")
    sys.exit(1)
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    sys.exit(1)


# --- 6. POLL FOR RESULTS ---
while True:
    print("Checking job status...")
    try:
        # Use the /project/{project_id}/models/summary/ endpoint
        status_response = requests.get(
            f"{BASE_URL}/project/{project_id}/models/summary/", 
            headers=HEADERS
        )
        status_response.raise_for_status()
        
        status_data = status_response.json()
        job_status = status_data.get("status")
        
        print(f"Current status: {job_status}")

        if job_status == "COMPLETED":
            print("Modeling completed!")
            
            # --- 7. DOWNLOAD THE .PDB FILE ---
            models = status_data.get("models")
            if not models:
                print("Job completed but no models were found.")
                break
            
            # Get the first (and usually best) model's ID
            model_id = models[0].get("model_id")
            output_filename = f"{project_id}_{model_id}.pdb"
            
            print(f"Downloading model {model_id} to {output_filename}...")
            
            # Use the /project/{project_id}/models/{model_id}.pdb endpoint
            pdb_response = requests.get(
                f"{BASE_URL}/project/{project_id}/models/{model_id}.pdb",
                headers=HEADERS
            )
            pdb_response.raise_for_status()
            
            # Save the file
            with open(output_filename, "w") as f:
                f.write(pdb_response.text)
                
            print(f"\nSuccessfully saved model to: {output_filename}")
            print(f"You can now use this '{output_filename}' file for Step 5 (upload to CASTp).")
            
            break # Exit the while loop
            
        elif job_status == "FAILED":
            print("Job failed. Check the SWISS-MODEL website for details.")
            break # Exit the while loop
            
        elif job_status in ["RUNNING", "PENDING"]:
            print("Job is still running. Waiting 60 seconds...")
            time.sleep(60)
        
        else:
            print(f"Unknown status: {job_status}. Waiting...")
            time.sleep(60)

    except requests.exceptions.HTTPError as e:
        print(f"Error checking status: {e.response.status_code}. Retrying...")
        time.sleep(30)
    except Exception as e:
        print(f"An unexpected error occurred: {e}. Retrying...")
        time.sleep(30)

Checking Favoured percentage from CSV...
Favoured percentage found: 85.294%

Favoured percentage (85.294%) is < 90%.
Proceeding with SWISS-MODEL homology modeling...

Successfully read and parsed FASTA file: proteins/3V2A.fasta
Number of sequences found: 2
Multiple sequences detected: [134, 772] residues each

Submitting job 'My_Protein_Homology_Model' to SWISS-MODEL...
Job submitted successfully! Project ID: fe04bc
Checking job status...
Current status: COMPLETED
Modeling completed!
Downloading model 01 to fe04bc_01.pdb...

Successfully saved model to: fe04bc_01.pdb
You can now use this 'fe04bc_01.pdb' file for Step 5 (upload to CASTp).


In [None]:
! mkdir castp_results

In [None]:
import os
import subprocess

# Create output directory if it doesn't exist
os.makedirs("castp_results", exist_ok=True)

# Run the CASTpFoldpy command
subprocess.run([
    "castpfoldpy",
    "--submit-download",
    "-p", "fe04bc_01.pdb",
    "-d", "castp_results",
    "--pocket"
], check=True)


PDB structure is validated.
Job submitted with job id for fe04bc_01.pdb is j_68fb39fe19eb6
job id is written to castp_results/fe04bc_01_submit.log file.
waiting...: 100%|███████████████████████████| 20/20 [00:20<00:00,  1.00s/second]
Output files created succesfully


In [None]:
import gradio as gr
import plotly.graph_objects as go
import requests
from Bio.PDB import PDBParser
import numpy as np
import tempfile
import os

def parse_pdb_structure(pdb_data, pdb_id="structure"):
    """Parse PDB data and extract atom coordinates"""
    parser = PDBParser(QUIET=True)
    
    # Write PDB data to temporary file
    with tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False) as tmp:
        tmp.write(pdb_data)
        tmp_path = tmp.name
    
    try:
        structure = parser.get_structure(pdb_id, tmp_path)
        
        # Extract coordinates and atom types
        atoms_data = []
        
        for model in structure:
            for chain in model:
                chain_id = chain.id
                for residue in chain:
                    res_name = residue.get_resname()
                    res_id = residue.get_id()[1]
                    atoms_in_residue = []
                    
                    for atom in residue:
                        coord = atom.get_coord()
                        atoms_data.append({
                            'coord': coord,
                            'element': atom.element,
                            'residue': res_name,
                            'res_id': res_id,
                            'chain': chain_id,
                            'name': atom.name
                        })
                        atoms_in_residue.append(len(atoms_data) - 1)
        
        os.unlink(tmp_path)
        return atoms_data
    
    except Exception as e:
        if os.path.exists(tmp_path):
            os.unlink(tmp_path)
        raise e

def get_atom_color(atom_type):
    """Return color for different atom types (CPK coloring)"""
    color_map = {
        'C': '#909090',  # Carbon - gray
        'N': '#3050F8',  # Nitrogen - blue
        'O': '#FF0D0D',  # Oxygen - red
        'S': '#FFFF30',  # Sulfur - yellow
        'P': '#FF8000',  # Phosphorus - orange
        'H': '#FFFFFF',  # Hydrogen - white
        'F': '#90E050',  # Fluorine - green
        'CL': '#1FF01F', # Chlorine - green
        'BR': '#A62929', # Bromine - brown
        'I': '#940094',  # Iodine - purple
        'FE': '#E06633', # Iron - orange
        'CA': '#3DFF00', # Calcium - green
    }
    return color_map.get(atom_type.upper(), '#FF1493')  # Default pink

def find_bonds(atoms_data, max_distance=2.0):
    """Find bonds between atoms based on distance"""
    bonds = []
    n_atoms = len(atoms_data)
    
    # Only check atoms within same or adjacent residues for efficiency
    for i in range(n_atoms):
        atom1 = atoms_data[i]
        for j in range(i + 1, min(i + 20, n_atoms)):  # Check next 20 atoms only
            atom2 = atoms_data[j]
            
            # Skip if different chains or residues too far apart
            if atom1['chain'] != atom2['chain']:
                continue
            if abs(atom1['res_id'] - atom2['res_id']) > 1:
                continue
            
            # Calculate distance
            dist = np.linalg.norm(atom1['coord'] - atom2['coord'])
            
            # Typical bond lengths: C-C ~1.5Å, C-N ~1.5Å, C-O ~1.4Å
            if dist < max_distance:
                bonds.append((i, j))
    
    return bonds

def create_stick_traces(atoms_data, bonds):
    """Create stick/line traces for bonds"""
    traces = []
    
    for bond in bonds:
        i, j = bond
        atom1 = atoms_data[i]
        atom2 = atoms_data[j]
        
        coord1 = atom1['coord']
        coord2 = atom2['coord']
        
        # Create line between atoms
        trace = go.Scatter3d(
            x=[coord1[0], coord2[0]],
            y=[coord1[1], coord2[1]],
            z=[coord1[2], coord2[2]],
            mode='lines',
            line=dict(
                color='gray',
                width=3
            ),
            showlegend=False,
            hoverinfo='skip'
        )
        traces.append(trace)
    
    return traces

def create_protein_visualization(atoms_data, style="sphere", show_backbone=True):
    """Create complete protein visualization"""
    
    coords = np.array([atom['coord'] for atom in atoms_data])
    atom_types = [atom['element'] for atom in atoms_data]
    colors = [get_atom_color(atom) for atom in atom_types]
    
    fig = go.Figure()
    
    # Add backbone trace (C-alpha connectivity)
    if show_backbone:
        ca_atoms = [i for i, atom in enumerate(atoms_data) if atom['name'] == 'CA']
        if len(ca_atoms) > 1:
            ca_coords = coords[ca_atoms]
            backbone_trace = go.Scatter3d(
                x=ca_coords[:, 0],
                y=ca_coords[:, 1],
                z=ca_coords[:, 2],
                mode='lines',
                line=dict(
                    color='lightblue',
                    width=6
                ),
                name='Backbone',
                hoverinfo='skip'
            )
            fig.add_trace(backbone_trace)
    
    # For stick visualization, add bond lines
    if style == "stick" or style == "ball-and-stick":
        bonds = find_bonds(atoms_data)
        stick_traces = create_stick_traces(atoms_data, bonds)
        for trace in stick_traces:
            fig.add_trace(trace)
    
    # Add atoms
    if style == "sphere":
        marker_size = 8
    elif style == "stick":
        marker_size = 3
    else:  # ball-and-stick
        marker_size = 5
    
    atoms_trace = go.Scatter3d(
        x=coords[:, 0],
        y=coords[:, 1],
        z=coords[:, 2],
        mode='markers',
        marker=dict(
            size=marker_size,
            color=colors,
            line=dict(width=0.5, color='white')
        ),
        text=[f"{atom['name']} ({atom['element']}) - {atom['residue']}{atom['res_id']}" 
              for atom in atoms_data],
        hovertemplate='<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<br>Z: %{z:.2f}<extra></extra>',
        name='Atoms'
    )
    fig.add_trace(atoms_trace)
    
    # Update layout
    fig.update_layout(
        title=f"Protein Structure ({len(atoms_data)} atoms)",
        scene=dict(
            xaxis_title='X (Å)',
            yaxis_title='Y (Å)',
            zaxis_title='Z (Å)',
            bgcolor='white',
            xaxis=dict(showbackground=True, backgroundcolor='rgb(230, 230, 230)'),
            yaxis=dict(showbackground=True, backgroundcolor='rgb(230, 230, 230)'),
            zaxis=dict(showbackground=True, backgroundcolor='rgb(230, 230, 230)'),
            aspectmode='data'
        ),
        showlegend=True,
        width=900,
        height=700,
        margin=dict(l=0, r=0, t=40, b=0)
    )
    
    return fig

def visualize_protein(pdb_file, style="sphere", show_backbone=True):
    """Visualize a protein structure from a PDB file"""
    try:
        if pdb_file is None:
            return None
        
        with open(pdb_file.name, 'r') as f:
            pdb_data = f.read()
        
        atoms_data = parse_pdb_structure(pdb_data)
        fig = create_protein_visualization(atoms_data, style, show_backbone)
        
        return fig
    
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

def load_from_pdb_id(pdb_id, style="sphere", show_backbone=True):
    """Download and visualize a protein from PDB database"""
    try:
        if not pdb_id or len(pdb_id.strip()) != 4:
            return None
        
        pdb_id = pdb_id.strip().upper()
        
        url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Could not download PDB ID: {pdb_id}")
            return None
        
        pdb_data = response.text
        atoms_data = parse_pdb_structure(pdb_data, pdb_id)
        fig = create_protein_visualization(atoms_data, style, show_backbone)
        
        return fig
    
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Create Gradio interface
with gr.Blocks(title="Protein 3D Viewer - Plotly", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🧬 Protein 3D Structure Viewer")
    gr.Markdown("Visualize protein structures from PDB files or download directly from the Protein Data Bank")
    
    with gr.Tabs():
        # Tab 1: Upload PDB file
        with gr.Tab("📁 Upload PDB File"):
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(
                        label="Upload PDB File",
                        file_types=[".pdb"]
                    )
                    style_dropdown = gr.Dropdown(
                        choices=["sphere", "stick", "ball-and-stick"],
                        value="sphere",
                        label="Visualization Style"
                    )
                    backbone_checkbox = gr.Checkbox(
                        label="Show Backbone (C-alpha trace)",
                        value=True
                    )
                    visualize_btn = gr.Button("Visualize", variant="primary", size="lg")
                    
                    gr.Markdown("""
                    ### 🎨 Visualization Styles:
                    - **Sphere**: Space-filling atoms only
                    - **Stick**: Small atoms with bond lines
                    - **Ball-and-stick**: Medium atoms with bonds
                    
                    ### 🌈 Atom Colors (CPK):
                    - **Carbon**: Gray
                    - **Nitrogen**: Blue
                    - **Oxygen**: Red
                    - **Sulfur**: Yellow
                    - **Phosphorus**: Orange
                    
                    ### 💡 Tip:
                    Stick view may take longer for large proteins!
                    """)
                
                with gr.Column(scale=2):
                    output_plot = gr.Plot(label="3D Viewer")
            
            visualize_btn.click(
                fn=visualize_protein,
                inputs=[file_input, style_dropdown, backbone_checkbox],
                outputs=output_plot
            )
        
        # Tab 2: Load from PDB ID
        with gr.Tab("🌐 Load from PDB Database"):
            with gr.Row():
                with gr.Column(scale=1):
                    pdb_id_input = gr.Textbox(
                        label="PDB ID (4 characters)",
                        placeholder="e.g., 1MSO",
                        max_lines=1
                    )
                    style_dropdown2 = gr.Dropdown(
                        choices=["sphere", "stick", "ball-and-stick"],
                        value="sphere",
                        label="Visualization Style"
                    )
                    backbone_checkbox2 = gr.Checkbox(
                        label="Show Backbone (C-alpha trace)",
                        value=True
                    )
                    load_btn = gr.Button("Load & Visualize", variant="primary", size="lg")
                    
                    gr.Markdown("""
                    ### 🔬 Popular PDB Examples:
                    
                    **Small Proteins (fast):**
                    - **1MSO**: Insulin (51 residues)
                    - **1UBQ**: Ubiquitin (76 residues)
                    - **1CRN**: Crambin (46 residues)
                    
                    **Medium Proteins:**
                    - **1GFL**: Green Fluorescent Protein
                    - **2DHB**: Hemoglobin (574 residues)
                    
                    **Try stick view with small proteins first!**
                    """)
                
                with gr.Column(scale=2):
                    output_plot2 = gr.Plot(label="3D Viewer")
            
            load_btn.click(
                fn=load_from_pdb_id,
                inputs=[pdb_id_input, style_dropdown2, backbone_checkbox2],
                outputs=output_plot2
            )
    
    gr.Markdown("""
    ---
    ### 🎮 Interactive Controls:
    - **Rotate**: Click and drag
    - **Zoom**: Scroll wheel or pinch
    - **Pan**: Right-click and drag
    - **Reset View**: Double-click
    
    ### 📚 Resources:
    - [RCSB Protein Data Bank](https://www.rcsb.org/) - Search and download PDB files
    
    ### ℹ️ About:
    This viewer uses **Plotly** for 3D visualization with **Biopython** for structure parsing.
    Bonds are calculated based on inter-atomic distances.
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


In [39]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

# ========== CONFIG ==========
PDB_PATH = "proteins/5FDQ.pdb"
OUTPUT_DIR = "prankweb_results"
PRANKWEB_URL = "https://prankweb.cz/"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ============================

# Get absolute path
absolute_path = os.path.abspath(PDB_PATH)
print(f"[INFO] Using absolute path: {absolute_path}")

# Setup Chrome driver with download preferences and HEADLESS mode
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless=new')  # Use new headless mode
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--window-size=1920,1080')  # Set viewport size
prefs = {
    "download.default_directory": os.path.abspath(OUTPUT_DIR),
    "download.prompt_for_download": False,
}
chrome_options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=chrome_options)

try:
    print("[INFO] Opening PrankWeb...")
    driver.get(PRANKWEB_URL)
    time.sleep(3)
    
    # Click "Custom structure" radio button using JavaScript
    print("[INFO] Clicking 'Custom structure' radio button...")
    wait = WebDriverWait(driver, 30)
    custom_structure = wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Custom structure')]")))
    driver.execute_script("arguments[0].click();", custom_structure)
    time.sleep(1)
    
    # Upload file with absolute path
    print(f"[INFO] Uploading: {absolute_path}")
    file_input = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
    file_input.send_keys(absolute_path)
    time.sleep(2)
    
    # Click submit using JavaScript
    print("[INFO] Submitting...")
    submit_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button[type='submit']")))
    driver.execute_script("arguments[0].click();", submit_btn)
    
    # Wait for results (adjust timeout as needed)
    print("[INFO] Waiting for results (this may take several minutes)...")
    wait_long = WebDriverWait(driver, 600)  # 10 minutes
    info_tab = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Info')]")))
    
    print("[INFO] Results ready! Clicking Info tab...")
    driver.execute_script("arguments[0].click();", info_tab)
    time.sleep(2)
    
    # Click download button using JavaScript
    print("[INFO] Clicking download button...")
    download_btn = wait_long.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Download prediction data')]")))
    driver.execute_script("arguments[0].click();", download_btn)
    
    print(f"[SUCCESS] Download started! Check {OUTPUT_DIR} folder")
    time.sleep(10)  # Wait for download to complete
    
except Exception as e:
    print(f"[ERROR] {e}")
    import traceback
    traceback.print_exc()
    
finally:
    driver.quit()
    print("Done!")

[INFO] Using absolute path: /Users/akshathr/Clones/Drugs/proteins/5FDQ.pdb
[INFO] Opening PrankWeb...
[INFO] Clicking 'Custom structure' radio button...
[INFO] Uploading: /Users/akshathr/Clones/Drugs/proteins/5FDQ.pdb
[INFO] Submitting...
[INFO] Waiting for results (this may take several minutes)...
[INFO] Results ready! Clicking Info tab...
[INFO] Clicking download button...
[SUCCESS] Download started! Check prankweb_results folder
Done!
