In [32]:
import requests
from pathlib import Path
import sys

def search_pdb_for_first_hit(protein_name: str):
    """
    Search RCSB PDB and return the first result found.
    """
    print(f"Searching PDB for the first hit of: {protein_name}")
    print("=" * 70)
    
    query = {
        "query": {
            "type": "terminal",
            "service": "text",
            "parameters": {
                "attribute": "struct.title",
                "operator": "contains_phrase",
                "value": protein_name
            }
        },
        "return_type": "entry",
        "request_options": {
            "return_all_hits": False,
            "results_content_type": ["experimental"],
            "sort": [{"sort_by": "score", "direction": "desc"}]
        }
    }
    
    url = "https://search.rcsb.org/rcsbsearch/v2/query"
    
    try:
        response = requests.post(url, json=query, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        # Get the identifier of the first result, if it exists
        result_set = data.get('result_set', [])
        if not result_set:
            print(f"✗ No structures found for '{protein_name}'")
            return None
            
        first_pdb_id = result_set[0]['identifier']
        print(f"✓ Found first hit: {first_pdb_id}\n")
        return first_pdb_id
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error searching PDB: {e}")
        return None


def download_pdb_file(pdb_id: str, output_dir: str = "proteins") -> Path:
    """Download PDB file from RCSB PDB"""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    pdb_id = pdb_id.strip().upper()
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    output_file = output_path / f"{pdb_id}.pdb"
    
    print(f"Downloading PDB file for {pdb_id}...")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        with open(output_file, 'wb') as f:
            f.write(response.content)
        
        file_size = output_file.stat().st_size / 1024
        print(f"✓ Successfully downloaded: {output_file} ({file_size:.2f} KB)\n")
        return output_file
        
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            print(f"✗ Error: PDB ID '{pdb_id}' not found")
        else:
            print(f"✗ HTTP Error: {e}")
        return None
    except Exception as e:
        print(f"✗ Error downloading file: {e}")
        return None


def download_fasta_file(pdb_id: str, output_dir: str = "proteins") -> Path:
    """Download FASTA sequence for a given PDB ID"""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    pdb_id = pdb_id.strip().upper()
    url = f"https://www.rcsb.org/fasta/entry/{pdb_id}"
    output_file = output_path / f"{pdb_id}.fasta"
    
    print(f"Downloading FASTA sequence for {pdb_id}...")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        if not response.text.strip().startswith(">"):
            print(f"✗ No valid FASTA data found for {pdb_id}")
            return None
        
        with open(output_file, 'w') as f:
            f.write(response.text)
        
        num_lines = len(response.text.splitlines())
        print(f"✓ Successfully downloaded: {output_file} ({num_lines} lines)\n")
        return output_file
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error downloading FASTA: {e}")
        return None


def download_first_protein_structure(protein_name: str, output_dir: str = "proteins"):
    """
    Search for a protein, find the first PDB ID, and download its PDB and FASTA files.
    """
    print("\n" + "=" * 70)
    print(f"STEP 1: Find and Download PDB/FASTA for '{protein_name}'")
    print("=" * 70 + "\n")
    
    # Find the first PDB ID for the given protein name
    pdb_id = search_pdb_for_first_hit(protein_name)
    
    if not pdb_id:
        print("=" * 70)
        print("Process stopped: No PDB ID was found.")
        print("=" * 70)
        return
    
    print("=" * 70)
    print(f"STEP 2: Downloading files for PDB ID: {pdb_id}")
    print("=" * 70 + "\n")
    
    # Download the corresponding PDB and FASTA files
    pdb_file = download_pdb_file(pdb_id, output_dir)
    fasta_file = download_fasta_file(pdb_id, output_dir)
    
    print("=" * 70)
    print("SUMMARY")
    print("=" * 70)
    if pdb_file or fasta_file:
        print(f"  ✓ Process complete for {pdb_id}.")
        if pdb_file:
            print(f"    PDB file saved to: {pdb_file}")
        if fasta_file:
            print(f"    FASTA file saved to: {fasta_file}")
    else:
        print(f"  ✗ Failed to download files for {pdb_id}.")
    print("=" * 70 + "\n")


# Example usage
if __name__ == "__main__":
    print("\n" + "🔬 " * 20)
    protein_name = "VEGFR-2"
    download_first_protein_structure(
        protein_name=protein_name,
        output_dir="proteins"
    )


🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 🔬 

STEP 1: Find and Download PDB/FASTA for 'VEGFR-2'

Searching PDB for the first hit of: VEGFR-2
✓ Found first hit: 3V2A

STEP 2: Downloading files for PDB ID: 3V2A

Downloading PDB file for 3V2A...
✓ Successfully downloaded: proteins/3V2A.pdb (429.68 KB)

Downloading FASTA sequence for 3V2A...
✓ Successfully downloaded: proteins/3V2A.fasta (4 lines)

SUMMARY
  ✓ Process complete for 3V2A.
    PDB file saved to: proteins/3V2A.pdb
    FASTA file saved to: proteins/3V2A.fasta



In [None]:
! ramplot pdb -i /Users/akshathr/Documents/Drugs/proteinsv1/ -o my_analysis_folder -m 0 -r 600 -p png

Input Directory: /Users/akshathr/Documents/Drugs/proteinsv1/

Output Directory: my_analysis_folder

Plot Resolutions: 600

Plot File Type: png

Plot Ramachandran Map : 2D & 3D All
Torsion Angle Calculation 
5F1A.fasta
5F19.pdb
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = rows
  df.loc[len(df)] = row

In [None]:
import requests
import time
import sys
import os

# --- 1. SET YOUR INPUTS HERE ---

# Get your API token from: https://swissmodel.expasy.org/token
API_TOKEN = "YOUR_API_TOKEN_HERE" 

# Set the path to your .fasta file
FASTA_FILE_PATH = "/Users/akshathr/Documents/Drugs/proteinsv1/4PH9.fasta" # Your file path

# Give your job a title (you can change this)
PROJECT_TITLE = "My_Protein_Homology_Model"

# --- 2. SCRIPT SETUP ---
BASE_URL = "https://swissmodel.expasy.org"
HEADERS = {"Authorization": f"Token {API_TOKEN}"}

if "YOUR_API_TOKEN_HERE" in API_TOKEN:
    print("Error: Please replace 'YOUR_API_TOKEN_HERE' with your actual API token.")
    sys.exit(1)

# --- [NEW] Read and PARSE the FASTA file ---
try:
    sequence_lines = []
    with open(FASTA_FILE_PATH, 'r') as f:
        for line in f:
            # Ignore the header line
            if not line.startswith('>'):
                # Add the line to our list, removing any whitespace/newlines
                sequence_lines.append(line.strip())
    
    # Join all sequence lines into a single string
    FASTA_SEQUENCE = "".join(sequence_lines)

    if len(FASTA_SEQUENCE) < 50:
        print(f"Error: No valid sequence found in '{FASTA_FILE_PATH}'.")
        print("Please ensure it is a valid FASTA file.")
        sys.exit(1)
    
    print(f"Successfully read and parsed FASTA file: {FASTA_FILE_PATH}")

except FileNotFoundError:
    print(f"Error: File not found at '{FASTA_FILE_PATH}'.")
    print("Please check the path and filename.")
    sys.exit(1)
except Exception as e:
    print(f"Error reading file: {e}")
    sys.exit(1)
# --- End of new section ---


# --- 3. (STEP 4, Part 1) SUBMIT MODELING JOB ---
print(f"Submitting job '{PROJECT_TITLE}' to SWISS-MODEL...")

payload = {
    # [FIXED] Use "target_sequences" (plural)
    "target_sequences": FASTA_SEQUENCE, 
    "project_title": PROJECT_TITLE
}

# Use the /automodel/ endpoint
try:
    submit_response = requests.post(
        f"{BASE_URL}/automodel/", 
        headers=HEADERS, 
        json=payload
    )
    submit_response.raise_for_status() # Raises an error for 4xx or 5xx responses

    project_id = submit_response.json().get("project_id")
    print(f"Job submitted successfully! Project ID: {project_id}")

except requests.exceptions.HTTPError as e:
    print(f"Error submitting job: {e.response.status_code}")
    print(f"Details: {e.response.text}")
    sys.exit(1)
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    sys.exit(1)


# --- 4. (STEP 4, Part 2) POLL FOR RESULTS ---
while True:
    print("Checking job status...")
    try:
        # Use the /project/{project_id}/models/summary/ endpoint
        status_response = requests.get(
            f"{BASE_URL}/project/{project_id}/models/summary/", 
            headers=HEADERS
        )
        status_response.raise_for_status()
        
        status_data = status_response.json()
        job_status = status_data.get("status")
        
        print(f"Current status: {job_status}")

        if job_status == "COMPLETED":
            print("Modeling completed!")
            
            # --- 5. (STEP 4, Part 3) DOWNLOAD THE .PDB FILE ---
            models = status_data.get("models")
            if not models:
                print("Job completed but no models were found.")
                break
            
            # Get the first (and usually best) model's ID
            model_id = models[0].get("model_id")
            output_filename = f"{project_id}_{model_id}.pdb"
            
            print(f"Downloading model {model_id} to {output_filename}...")
            
            # Use the /project/{project_id}/models/{model_id}.pdb endpoint
            pdb_response = requests.get(
                f"{BASE_URL}/project/{project_id}/models/{model_id}.pdb",
                headers=HEADERS
            )
            pdb_response.raise_for_status()
            
            # Save the file
            with open(output_filename, "w") as f:
                f.write(pdb_response.text)
                
            print(f"\nSuccessfully saved model to: {output_filename}")
            print(f"You can now use this '{output_filename}' file for Step 5 (upload to CASTp).")
            
            break # Exit the while loop
            
        elif job_status == "FAILED":
            print("Job failed. Check the SWISS-MODEL website for details.")
            break # Exit the while loop
            
        elif job_status in ["RUNNING", "PENDING"]:
            print("Job is still running. Waiting 60 seconds...")
            time.sleep(60)
        
        else:
            print(f"Unknown status: {job_status}. Waiting...")
            time.sleep(60)

    except requests.exceptions.HTTPError as e:
        print(f"Error checking status: {e.response.status_code}. Retrying...")
        time.sleep(30)
    except Exception as e:
        print(f"An unexpected error occurred: {e}. Retrying...")
        time.sleep(30)

Successfully read and parsed FASTA file: /Users/akshathr/Documents/Drugs/proteinsv1/4PH9.fasta
Submitting job 'My_Protein_Homology_Model' to SWISS-MODEL...
Job submitted successfully! Project ID: bdc7c4
Checking job status...
Current status: INITIALISED
Unknown status: INITIALISED. Waiting...
Checking job status...
Current status: RUNNING
Job is still running. Waiting 60 seconds...
Checking job status...
Current status: RUNNING
Job is still running. Waiting 60 seconds...
Checking job status...
Current status: RUNNING
Job is still running. Waiting 60 seconds...
Checking job status...
Current status: COMPLETED
Modeling completed!
Downloading model 01 to bdc7c4_01.pdb...

Successfully saved model to: bdc7c4_01.pdb
You can now use this 'bdc7c4_01.pdb' file for Step 5 (upload to CASTp).


In [29]:
import requests
import sys
import os
from bs4 import BeautifulSoup # <-- New library

# --- 1. SET YOUR INPUTS HERE ---

# The PDB file you downloaded from SWISS-MODEL
# e.g., "61b1b068a0a1_01.pdb" 
PDB_FILE_PATH = "/Users/akshathr/Documents/Drugs/bdc7c4_01.pdb" 

# CASTpfold will email a link to your results to this address
USER_EMAIL = "akshath.r333@gmail.com" 

# Give your job a name (optional, but good practice)
JOB_NAME = "My_Protein_Pocket_Analysis"

# --- 2. SCRIPT SETUP ---
# The new URL for the computation page
COMPUTE_URL = "https://cfold.bme.uic.edu/castpfold/compute"

# --- 3. INPUT VALIDATION ---
if "YOUR_MODEL.pdb" in PDB_FILE_PATH:
    print("Error: Please update 'PDB_FILE_PATH' with the path to your .pdb file.")
    sys.exit(1)
if "your.email@example.com" in USER_EMAIL:
    print("Error: Please update 'USER_EMAIL' with your email address.")
    sys.exit(1)
if not os.path.exists(PDB_FILE_PATH):
    print(f"Error: File not found at '{PDB_FILE_PATH}'")
    sys.exit(1)

# --- 4. PREPARE AND SUBMIT JOB ---

# We use a Session object to persist cookies, which is often
# needed for CSRF authentication to work.
try:
    with requests.Session() as session:
        
        # --- Step 4a: GET token ---
        print("Loading submission page to get security token...")
        
        get_response = session.get(COMPUTE_URL)
        get_response.raise_for_status()
        
        # Parse the HTML to find the hidden CSRF token
        soup = BeautifulSoup(get_response.text, 'html.parser')
        csrf_token_input = soup.find('input', {'name': 'csrf_token'})
        
        if not csrf_token_input:
            print("Error: Could not find 'csrf_token' on the page.")
            print("The website structure may have changed.")
            sys.exit(1)
            
        csrf_token = csrf_token_input['value']
        print(f"Found CSRF Token: {csrf_token[-6:]}") # Print last 6 chars
        
        
        # --- Step 4b: POST file ---
        print(f"Preparing to upload {PDB_FILE_PATH}...")

        with open(PDB_FILE_PATH, 'rb') as f:
            
            # These are the field names from the new website's form
            # 'file' is for the pdb file
            files = {
                'file': (os.path.basename(PDB_FILE_PATH), f, 'application/octet-stream')
            }
            
            # 'email', 'job_name', and 'csrf_token' are sent in the data payload
            data = {
                'email': USER_EMAIL,
                'job_name': JOB_NAME,
                'csrf_token': csrf_token
            }
            
            print(f"Submitting {PDB_FILE_PATH} to CASTpfold server...")
            
            # Submit the data to the same URL
            post_response = session.post(COMPUTE_URL, files=files, data=data)
            post_response.raise_for_status()

            print("\n--- Job Submitted Successfully! ---")
            print(f"The server is processing your job. A link will also be emailed to: {USER_EMAIL}")
            
            # The new server conveniently redirects us directly to the results page
            results_url = post_response.url
            
            if "results" in results_url:
                print("\n-------------------------------------------")
                print("Your permanent results link is:")
                print(results_url)
                print("-------------------------------------------")
            else:
                print("\nJob submitted, but could not parse results URL.")
                print(f"Check the site and your email. Response URL was: {results_url}")

except requests.exceptions.HTTPError as e:
    print(f"Error during web request: {e.response.status_code}")
    print(f"Details: {e.response.text}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Loading submission page to get security token...
Error: Could not find 'csrf_token' on the page.
The website structure may have changed.


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
