In [2]:
import requests
import time
import sys
import os
import tarfile
import io

# Configuration for the ColabFold/MMseqs2 Server used by Boltz-2
MSA_SERVER_URL = "https://api.colabfold.com"
User_Agent = "Boltz-2-Local-Script"

def submit_job(sequence, job_name="boltz_msa"):
    """
    Submits a protein sequence to the MSA server.
    """
    endpoint = f"{MSA_SERVER_URL}/ticket/msa"
    
    # Standard Boltz-2 / ColabFold parameters
    payload = {
        "q": f">1\n{sequence}",
        "mode": "all",  # Searches UniRef30 + Environmental
        "db": "uniref30,colabfold_envdb_202108",
        "use_templates": 0,
        "use_pairing": 1,
    }

    print(f"üöÄ Submitting sequence to {MSA_SERVER_URL}...")
    try:
        response = requests.post(endpoint, data=payload, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        if "id" not in data:
            print(f"‚ùå Error: No Job ID received. Server said: {data}")
            sys.exit(1)
            
        ticket_id = data["id"]
        print(f"‚úÖ Job submitted successfully. Ticket ID: {ticket_id}")
        return ticket_id
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Connection error during submission: {e}")
        sys.exit(1)

def poll_status(ticket_id):
    """
    Polls the server status until the MSA generation is complete.
    """
    status_endpoint = f"{MSA_SERVER_URL}/ticket/{ticket_id}"
    
    print("‚è≥ Waiting for MSA generation (this can take 2-10 minutes)...")
    
    while True:
        try:
            response = requests.get(status_endpoint, timeout=30)
            response.raise_for_status()
            data = response.json()
            
            status = data.get("status")
            
            if status == "COMPLETE":
                print("\n‚úÖ MSA Generation Complete!")
                break
            elif status == "ERROR":
                print(f"\n‚ùå Server reported an error: {data.get('msg', 'Unknown error')}")
                sys.exit(1)
            elif status == "RUNNING" or status == "PENDING":
                # Wait 5 seconds before checking again
                sys.stdout.write(".")
                sys.stdout.flush()
                time.sleep(5)
            else:
                print(f"\n‚ö†Ô∏è Unknown status: {status}")
                time.sleep(5)
                
        except requests.exceptions.RequestException as e:
            print(f"\n‚ö†Ô∏è Network glitch ({e}), retrying...")
            time.sleep(10)

def download_results(ticket_id, output_filename):
    """
    Downloads and extracts the A3M file from the server.
    """
    download_endpoint = f"{MSA_SERVER_URL}/result/download/{ticket_id}"
    
    print(f"‚¨áÔ∏è  Downloading results...")
    try:
        response = requests.get(download_endpoint, stream=True, timeout=60)
        response.raise_for_status()
        
        # The server returns a tarball containing multiple files (a3m, pdb70, etc.)
        # We need to extract just the .a3m file.
        with tarfile.open(fileobj=io.BytesIO(response.content), mode="r:gz") as tar:
            # Look for the .a3m file in the tarball
            a3m_files = [m for m in tar.getmembers() if m.name.endswith(".a3m")]
            
            if not a3m_files:
                print("‚ùå Error: No .a3m file found in the server response.")
                return

            # Extract the first A3M file found (usually there is only one relevant one)
            target_file = a3m_files[0]
            f = tar.extractfile(target_file)
            content = f.read().decode("utf-8")
            
            # Save to local disk
            with open(output_filename, "w") as out:
                out.write(content)
            
            print(f"üéâ Success! MSA saved to: {os.path.abspath(output_filename)}")
            
    except Exception as e:
        print(f"‚ùå Error downloading/extracting results: {e}")

if __name__ == "__main__":
    # --- USER INPUT ---
    # Replace this string with your protein sequence
    # PROTEIN_SEQUENCE = "MQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE" 
    # OUTPUT_FILE = "GB1.a3m"
    # PROTEIN_SEQUENCE = "MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR" 
    # OUTPUT_FILE = "DHFR.a3m"
    PROTEIN_SEQUENCE = "MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR" 
    OUTPUT_FILE = "DHFR.a3m"

    # 1. Submit
    ticket = submit_job(PROTEIN_SEQUENCE)
    
    # 2. Poll
    poll_status(ticket)
    
    # 3. Download
    download_results(ticket, OUTPUT_FILE)

üöÄ Submitting sequence to https://api.colabfold.com...
‚úÖ Job submitted successfully. Ticket ID: T235y3ovjkPa1IIJA3PgoD464vDH0x99Kvc_2Q
‚è≥ Waiting for MSA generation (this can take 2-10 minutes)...

‚úÖ MSA Generation Complete!
‚¨áÔ∏è  Downloading results...
üéâ Success! MSA saved to: /home/xux/Desktop/ProteinMCP/ProteinMCP/mcp-servers/msa_server_mcp/notebooks/DHFR.a3m
