In [6]:
import pyodbc
import requests
from bs4 import BeautifulSoup

# SQL Server connection details
server = 'PALOSKY'
database = 'ProteinDB'
Trusted_Connection = 'yes'

# Function to connect to SQL Server
def connect_to_sql_server(server, database, Trusted_Connection):
    connection_string = f'Driver={{ODBC Driver 17 for SQL Server}};SERVER={server};Trusted_Connection={Trusted_Connection};DATABASE={database}'
    conn = pyodbc.connect(connection_string)
    return conn

# Establish a connection to SQL Server
connection = connect_to_sql_server(server, database, Trusted_Connection)

# Function to retrieve peptides surrounding mutation sites from the protein_9_mer table
def get_mutation_peptides(connection, mutation_details):
    cursor = connection.cursor()
    mutation_peptides = []
    
    for mutation in mutation_details:
        protein_id = mutation['protein_id']
        mutation_position = mutation['mutation_position']
        allele = mutation['HLA_allele']
        
        query = """
        SELECT mer, protein_id, position 
        FROM protein_9_mer 
        WHERE protein_id = ? 
        AND position BETWEEN ? AND ?
        """
        
        cursor.execute(query, (protein_id, mutation_position - 8, mutation_position + 8))
        peptides = cursor.fetchall()
        
        if not peptides:
            print(f"No peptides found for protein_id: {protein_id} around mutation position: {mutation_position}")
        
        for peptide in peptides:
            mutation_peptides.append({
                'mer': peptide[0],
                'protein_id': peptide[1],
                'position': peptide[2],
                'HLA_allele': allele
            })
    
    return mutation_peptides

# Then we input a function to insert binding predictions into the binding_protein table
def insert_binding_prediction(connection, allele, mer, binding_prediction):
    cursor = connection.cursor()
    
    insert_query = """
    INSERT INTO binding_protein (allele, mer, binding_prediction)
    VALUES (?, ?, ?)
    """
    
    cursor.execute(insert_query, (allele, mer, binding_prediction))
    connection.commit()


# Define mutation details with correct protein IDs and positions
mutation_details = [
    {'protein_id': 'Q7Z4H8', 'mutation_position': 467, 'HLA_allele': 'HLA-DRB1*01:01'},
    {'protein_id': 'Q7Z7G8', 'mutation_position': 1, 'HLA_allele': 'HLA-DRB1*04:01'},
    # Add more mutations with their corresponding positions if needed
]

# Retrieve peptides surrounding mutation sites
mutation_peptides = get_mutation_peptides(connection, mutation_details)

# Close SQL connection after data retrieval
#connection.close()

# Function to interact with the NetMHCIIpan server
def query_netmhcii_pan(allele, sequence):
    """
    Queries the NetMHCIIpan server to make binding predictions for a given allele and peptide sequence.

    Args:
        allele (str): The allele to use for binding prediction (e.g., HLA-DRB1*01:01).
        sequence (str): The peptide sequence (9-mer) to predict binding affinity for.

    Returns:
        str: The predicted binding affinity or result from NetMHCIIpan.
    """
    # Updated URL as per the redirect
    url = "https://services.healthtech.dtu.dk/service.php?NetMHCIIpan-4.0"

    # Prepare the payload for POST request
    payload = {
        'allele': allele,
        'peptide': sequence,
        'length': '9',  # Now this is to ensure the length is set to 9 for 9-mers
        'format': 'short'  # Then request output in a short format
    }

    try:
        response = requests.post(url, data=payload)
        
        if response.status_code == 200:
            # Check if the response contains the expected <pre> tag
            soup = BeautifulSoup(response.content, 'html.parser')
            pre_tag = soup.find('pre')
            
            if pre_tag:
                result = pre_tag.text.strip()  # This enables us to extract text within <pre> tags as output
                return result
            else:
                print("No <pre> tag found in the response. Check the server's response format.")
                
                # Print the entire response for debugging
                print(response.content.decode()) 
                return None
        else:
            print(f"Failed to retrieve data from NetMHCIIpan. Status code: {response.status_code}")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Error during NetMHCIIpan request: {e}")
        return None

###Then check to see an example usage of the NetMHCIIpan function
results = []

###A loop that queries the NetMHCIIpan for each 9-mer around the mutation site
for peptide in mutation_peptides:
    mer = peptide['mer']
    allele = peptide['HLA_allele']
    
    print(f"Querying NetMHCIIpan for allele: {allele}, 9-mer: {mer}")
    
    result = query_netmhcii_pan(allele, mer)
    if result:
        # Insert the binding prediction into the binding_protein table
        insert_binding_prediction(connection, allele, mer, result)
        results.append({
            'allele': allele,
            'mer': mer,
            'binding_prediction': result
        })
    else:
        print(f"Failed to get binding prediction for allele: {allele}, 9-mer: {mer}")

# Display results
if results:
    for res in results:
        print(f"Allele: {res['allele']}, 9-mer: {res['mer']}, Binding Prediction: {res['binding_prediction']}")
else:
    print("No binding predictions were retrieved.")

# Close SQL connection
connection.close()


Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: CYYYQVLQK
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: YYYQVLQKY
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: YYQVLQKYA
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: YQVLQKYAE
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: QVLQKYAER
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: VLQKYAERQ
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: LQKYAERQS
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: QKYAERQSS
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: KYAERQSSK
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: YAERQSSKP
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: AERQSSKPE
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: ERQSSKPEV
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: RQSSKPEVR
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: QSSKPEVRD
Querying NetMHCIIpan for allele: HLA-DRB1*01:01, 9-mer: SSKPEVRDG
Querying N