## Generate multiallelic IEDB properties

In [None]:
import requests
import pandas as pd
import numpy as np
import os
import re
import csv
from io import StringIO

In [None]:
def get_immunogenicity_df(model,sequence_string, allele, length):

    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
    }
    data = 'method='+ model + '&sequence_text=' + sequence_string + '&allele='+ allele + '&length=' + length
    print(data)
    response = requests.post('http://tools-cluster-interface.iedb.org/tools_api/mhci/', headers=headers, data=data)
    return response.text

In [None]:
#function that converts list of strings to fasta format for API query

def convert_to_iedb(list_of_strings):
    dfs = pd.DataFrame(list_of_strings, columns =['sequence'])
    dfs['pep'] = 'peptide'
    dfs['num'] = range(len(dfs))
    dfs['num'] = dfs['num'].astype(str)
    dfs['fafsa'] = dfs['pep'] + dfs['num']
    str_list = []
    for x,y in zip(dfs['fafsa'].tolist(),dfs['sequence'].tolist()):
        str_list.append(x + '%0'+ 'A'+ y + '%0A%3E')
    new_str = ''.join(str_list)
    new_str = '%3E' + new_str[:-6]
    return new_str

In [None]:
# Function to filter out any unknown peptides

def filter_peptide(df):
  for peptide in df['peptide']:
    if 'X' in peptide:
      # Drop row
      df = df.loc[df['peptide'] != peptide]
  return df

In [None]:
# Read in data
PATH = '/home/ddz5/immunoGAT/MultiAllele/immuno_data_multi_allele.txt'

peptide_df = pd.read_table(PATH)
peptide_df['netmhcpan_el_score'] = 0
peptide_df['netmhcpan_el_ic50'] = 0
peptide_df['netmhcpan_el_rank'] = 0

peptide_df['netmhcpan_ba_score'] = 0
peptide_df['netmhcpan_ba_ic50'] = 0
peptide_df['netmhcpan_ba_rank'] = 0

peptide_df = filter_peptide(peptide_df)
peptide_df

Unnamed: 0,peptide,allele,immunogenicity,netmhcpan_el_score,netmhcpan_el_ic50,netmhcpan_el_rank,netmhcpan_ba_score,netmhcpan_ba_ic50,netmhcpan_ba_rank
0,LSNSGKDVPK,HLA-A*11:01,0,0,0,0,0,0,0
1,TTLFHTFYEL,HLA-A*24:02,0,0,0,0,0,0,0
2,KFGDLTNNF,HLA-A*24:02,0,0,0,0,0,0,0
3,KLFESKAEL,HLA-A*02:01,0,0,0,0,0,0,0
4,KLFESKAELA,HLA-A*02:01,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
24535,DSKGRSYNL,HLA-B*08:01,1,0,0,0,0,0,0
24536,DSKKRSYNL,HLA-B*08:01,1,0,0,0,0,0,0
24537,TDLGQNLLY,HLA-A*01:01,1,0,0,0,0,0,0
24538,AVVSLLRLLK,HLA-A*11:01,1,0,0,0,0,0,0


In [None]:
# Create dictionary of peptide sequences split among alleles

peptide_dict = {allele: [] for allele in set(peptide_df['allele'])}

# Store all peptide seqs into dict

for index in range(peptide_df.shape[0]):
  peptide_dict[peptide_df.iloc[index]['allele']].append(peptide_df.iloc[index]['peptide'])

In [None]:
def get_immunogenicity_df(model,sequence_string, allele, length):

    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
    }
    data = 'method='+ model + '&sequence_text=' + sequence_string + '&allele='+ allele + '&length=' + length
    print(data)
    response = requests.post('http://tools-cluster-interface.iedb.org/tools_api/mhci/', headers=headers, data=data)
    return response.text

In [None]:
# Return all the unique lengths in a list of peptide strings
def unique_lengths(peptides):
  lengths = set()
  for peptide in peptides:
    lengths.add(len(peptide))
  return lengths

In [None]:
# Function to subdivide peptide sequences according to sequence length
def subdivide_peptide(peptides):
  lengths = unique_lengths(peptides)
  lengths = {length : [] for length in lengths}
  for i in range(len(peptides)):
    lengths[len(peptides[i])].append(peptides[i])
  return lengths

In [None]:
# Calculate Class-I Binding predictions using netmhcpan_el and netmhcpan_ba methods

# 27 unique allele - submit API calls as grouped by allele

# To account for errors, implement queue so that you can append the length back if it fails to work
from collections import deque

alleles = list(peptide_dict.keys())

methods = ['netmhcpan_el', 'netmhcpan_ba']

url = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'

result_PATH = '/content/output'

for method in methods:
  for allele in alleles:
    # Subdivide alleles by length
    subpeptides = subdivide_peptide(peptide_dict[allele])
    q = deque()
    for length in subpeptides.keys():
      q.append(length)

    success = len(q)

    for i in range(1000):
      if success == 0:
        break
      # Pop queue
      length = q.popleft()
      seqs = subpeptides[length]
      converted_seq = convert_to_iedb(seqs)

      res = get_immunogenicity_df(method, converted_seq, allele, str(length))

      parts = res.split('\n')

      with open('output', 'w') as f:
        for i in range(len(parts)):
          f.write(parts[i] + '\n')
      result_pd = pd.read_table(result_PATH)

      # Error check for an emtpy response
      if result_pd.empty or not (isinstance(result_pd.iloc[0][1], np.int64)):
        # Enqueue previous length
        q.append(length)
        print(allele, length, method)

      else:
        # Iterate through every peptide in specific allele and length
        for index in range(result_pd.shape[0]):
          try:
            peptide_df.loc[peptide_df['peptide'] == result_pd.iloc[index]['peptide'], f'{method}_score'] = result_pd.iloc[index]['score']
          except:
            peptide_df.loc[peptide_df['peptide'] == result_pd.iloc[index]['peptide'], f'{method}_ic50'] = result_pd.iloc[index]['ic50']
          peptide_df.loc[peptide_df['peptide'] == result_pd.iloc[index]['peptide'], f'{method}_rank'] = result_pd.iloc[index]['percentile_rank']
        success -= 1

print("Done!")

method=netmhcpan_el&sequence_text=%3Epeptide0%0ARTKQLYPEW%0A%3Epeptide1%0ASMMPEAMTI%0A%3Epeptide2%0ARTWHYCGSY%0A%3Epeptide3%0AVTNHAPLSW%0A%3Epeptide4%0AKIMKVVNRW%0A%3Epeptide5%0AKAYANMWSL%0A%3Epeptide6%0AKSAHGSPTF%0A%3Epeptide7%0ARQYDPVAAL%0A%3Epeptide8%0AYLTAYQATV%0A%3Epeptide9%0ARVLDCRTAF%0A%3Epeptide10%0ARVLDTVEKW%0A%3Epeptide11%0ALLIPNPPYI%0A%3Epeptide12%0ATLLPATMNI%0A%3Epeptide13%0ASLRILYMTL%0A%3Epeptide14%0AKTWGKNLVF&allele=HLA-A*32:01&length=9
method=netmhcpan_el&sequence_text=%3Epeptide0%0ARSCTMPPVSF%0A%3Epeptide1%0ARMAMTDTTPF%0A%3Epeptide2%0AAQFAGKDQTY%0A%3Epeptide3%0AVVLPSDVTSY%0A%3Epeptide4%0ADLAGGTFDVS%0A%3Epeptide5%0ARIRDGLQYGW&allele=HLA-A*32:01&length=10
method=netmhcpan_el&sequence_text=%3Epeptide0%0AYMWLGARYLEF%0A%3Epeptide1%0AKVVLPSDVTSY%0A%3Epeptide2%0AATWASHIHLVI&allele=HLA-A*32:01&length=11
method=netmhcpan_el&sequence_text=%3Epeptide0%0AIQYPGSEIK%0A%3Epeptide1%0AVLAAKYIQY%0A%3Epeptide2%0AKQHGNIMYR%0A%3Epeptide3%0AGFLGRYIVK%0A%3Epeptide4%0ASLLNNYTLK%0A%3Epeptide5%0

In [None]:
# Save results to csv
peptide_df.to_csv('immuno_data_multi_allele.xlsx')

In [None]:
peptide_df

Unnamed: 0,peptide,allele,immunogenicity,netmhcpan_el_score,netmhcpan_el_ic50,netmhcpan_el_rank,netmhcpan_ba_score,netmhcpan_ba_ic50,netmhcpan_ba_rank
0,LSNSGKDVPK,HLA-A*11:01,0,0.24800,0,0.72,0,127.54,0.55
1,TTLFHTFYEL,HLA-A*24:02,0,0.00179,0,6.40,0,5458.01,2.40
2,KFGDLTNNF,HLA-A*24:02,0,0.90200,0,0.02,0,69.75,0.11
3,KLFESKAEL,HLA-A*02:01,0,0.94500,0,0.02,0,16.37,0.15
4,KLFESKAELA,HLA-A*02:01,0,0.39800,0,0.36,0,124.66,0.88
...,...,...,...,...,...,...,...,...,...
24535,DSKGRSYNL,HLA-B*08:01,1,0.74100,0,0.05,0,312.59,0.34
24536,DSKKRSYNL,HLA-B*08:01,1,0.90500,0,0.02,0,68.18,0.08
24537,TDLGQNLLY,HLA-A*01:01,1,0.35500,0,0.31,0,3282.58,0.85
24538,AVVSLLRLLK,HLA-A*11:01,1,0.67300,0,0.16,0,13.12,0.05


In [None]:
# Script for expanded IEDB dataset

PATH = '/content/expanded_IEDB_data.csv'

# Create dictionary
peptides = {}

peptide_df = pd.read_csv(PATH)

for index, row in peptide_df.iterrows():
  if row['allele'] in peptides:
    peptides[row['allele']].append(row['peptide'])
  else:
    peptides[row['allele']] = [row['peptide']]

In [None]:
### Class-I Binding

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = list(peptides.keys())

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'

final_expanded_results_mhci = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
      converted_seq = convert_to_iedb(peptides[allele])
      res = get_immunogenicity_df(method, converted_seq, allele, '9')
      parts = res.split('\n')
      # Append to pd dataframe
      for part in parts[1: len(parts) - 1]:
          part = part.split('\t')
          if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
            part = part[0:6] + part[8:]
          part.append(method)
          try:
            final_expanded_results_mhci.loc[len(final_expanded_results_mhci.index)] = part
          except Exception as e: print(e)
print("Done!")

NameError: ignored

In [None]:
final_expanded_results_mhci

NameError: ignored

In [None]:
# Export into CSV
final_expanded_results_mhci.to_csv('expanded_mhci_data.csv')

In [None]:
### MHC Processing Predictions

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = list(peptides.keys())

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/processing/'

final_expanded_results_mhcp = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
      converted_seq = convert_to_iedb(peptides[allele])
      res = get_immunogenicity_df(method, converted_seq, allele, '9')
      parts = res.split('\n')
      # Append to pd dataframe
      for part in parts[1: len(parts) - 1]:
          part = part.split('\t')
          if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
            part = part[0:6] + part[8:]
          part.append(method)
          try:
            final_expanded_results_mhcp.loc[len(final_expanded_results_mhcp.index)] = part
          except Exception as e: print(e)
print("Done!")

In [None]:
# Export into CSV
final_expanded_results_mhcp.to_csv('expanded_mhcp_data.csv')

In [None]:
# Script for smaller test/train sets

Test_PATH = '/content/immuno_data_test_IEDB_A0201_HLAseq_2_csv.csv'

In [None]:
test_peptides = []
with open(Test_PATH) as f:
    for num, row in enumerate(f):
        # Processing
        if num != 0:
            peptide = row[:11]
            for pos, letter in enumerate(peptide):
                if letter.upper() == "J":
                    final_peptide = peptide[pos + 1:]
            test_peptides.append(final_peptide)

In [None]:
Train_PATH = '/content/immuno_data_train_IEDB_A0201_HLAseq_2_csv.csv'

In [None]:
train_peptides = []
with open(Train_PATH) as f:
    for num, row in enumerate(f):
        # Processing
        if num != 0:
            peptide = row[:11]
            for pos, letter in enumerate(peptide):
                if letter.upper() == "J":
                    final_peptide = peptide[pos + 1:]
            train_peptides.append(final_peptide)

In [None]:
converted_test_seqs = convert_to_iedb(test_peptides)
converted_train_seqs = convert_to_iedb(train_peptides)

In [None]:
### Test Dataset: Class-I Binding

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = ['HLA-A*01:01','HLA-A*02:01']

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'

final_test_results_mhci = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
            res = get_immunogenicity_df(method, converted_test_seqs, allele, '9')
            parts = res.split('\n')
            # Append to pd dataframe
            for part in parts[1: len(parts) - 1]:
                part = part.split('\t')
                if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
                  part = part[0:6] + part[8:]
                part.append(method)
                try:
                  final_test_results_mhci.loc[len(final_test_results_mhci.index)] = part
                except Exception as e: print(e)
print("Done!")

method=ann&sequence_text=%3Epeptide0%0AGMPPHMLPVL%0A%3Epeptide1%0AGLALLACAGL%0A%3Epeptide2%0ARIAQCFLRV%0A%3Epeptide3%0AALARWLPPV%0A%3Epeptide4%0ATHLMVLCCV%0A%3Epeptide5%0ALLIKKLPRV%0A%3Epeptide6%0ALLDQLIEEV%0A%3Epeptide7%0ALLDQLIEEV%0A%3Epeptide8%0AVLLNAPSEA%0A%3Epeptide9%0AYLLSGSDLFI%0A%3Epeptide10%0ALMIEYNLLT%0A%3Epeptide11%0AGLADGMEHL%0A%3Epeptide12%0AFLGGHVAVA%0A%3Epeptide13%0AFVVPILLKA%0A%3Epeptide14%0ATLACFVLAAV%0A%3Epeptide15%0AVLIAGYIIVF%0A%3Epeptide16%0ATLEDLLMGT%0A%3Epeptide17%0ATLEDLLMGT%0A%3Epeptide18%0ALMAVAILKEV%0A%3Epeptide19%0AGLGQVPLIV%0A%3Epeptide20%0AIMLEALERV%0A%3Epeptide21%0AYLLPEAEEI%0A%3Epeptide22%0AMLGIWFFTL%0A%3Epeptide23%0AGMVKAALEAI%0A%3Epeptide24%0AAAAWYLWEV%0A%3Epeptide25%0AKIRSDNIKKL%0A%3Epeptide26%0ALLIGICVAV%0A%3Epeptide27%0ANLDTLMTYV%0A%3Epeptide28%0ALLDTNYNLFY%0A%3Epeptide29%0AFLAADGHPA%0A%3Epeptide30%0ATLWYRAPEV%0A%3Epeptide31%0ATLWYRAPEV%0A%3Epeptide32%0AYLHPKEYEW%0A%3Epeptide33%0AVLWDYVYQL%0A%3Epeptide34%0AKLKKIKNSL%0A%3Epeptide35%0AKLIANNTRV%0A%3Ep

In [None]:
# Export into CSV
final_test_results_mhci.to_csv('test_mhci_data.csv')

In [None]:
### Train Dataset: Class-I Binding

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = ['HLA-A*01:01','HLA-A*02:01']

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'

final_train_results_mhci = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
            res = get_immunogenicity_df(method, converted_train_seqs, allele, '9')
            parts = res.split('\n')
            # Append to pd dataframe
            for part in parts[1: len(parts) - 1]:
                part = part.split('\t')
                if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
                  part = part[0:6] + part[8:]
                part.append(method)
                try:
                  final_train_results_mhci.loc[len(final_train_results_mhci.index)] = part
                except Exception as e: print(e)
print("Done!")

method=ann&sequence_text=%3Epeptide0%0ASLILVSQYT%0A%3Epeptide1%0ALMSTLLIYL%0A%3Epeptide2%0ALLHTDFEQV%0A%3Epeptide3%0ALLHTDFEQV%0A%3Epeptide4%0AMMIDDFGTA%0A%3Epeptide5%0ASLLSGDWVL%0A%3Epeptide6%0AKTLETPEFV%0A%3Epeptide7%0AGLYDGMEHC%0A%3Epeptide8%0ALIIPFIHLI%0A%3Epeptide9%0AVLAFGFALL%0A%3Epeptide10%0ALLVRNSFEV%0A%3Epeptide11%0AVDSIFEQWL%0A%3Epeptide12%0ANELFDSLFPV%0A%3Epeptide13%0AIIALLFALV%0A%3Epeptide14%0AFVLVILARL%0A%3Epeptide15%0ADQVILLNKH%0A%3Epeptide16%0AVLILLLLIYL%0A%3Epeptide17%0AFLSEHPNVTL%0A%3Epeptide18%0AYLESFCEDV%0A%3Epeptide19%0ALMIFISSFL%0A%3Epeptide20%0AFLLVIGACV%0A%3Epeptide21%0AAMAVLYLAL%0A%3Epeptide22%0AAMAGASTSA%0A%3Epeptide23%0AVLAGSVDEL%0A%3Epeptide24%0APGLSISGNL%0A%3Epeptide25%0AILDKVLVHL%0A%3Epeptide26%0AFYLTNDVSF%0A%3Epeptide27%0AFYLTNDVSF%0A%3Epeptide28%0ASLAVVSTQL%0A%3Epeptide29%0ALLAILPYYV%0A%3Epeptide30%0ASLLRSLENV%0A%3Epeptide31%0ALIIPCIHLI%0A%3Epeptide32%0AKLVGKTVKV%0A%3Epeptide33%0AHVLKAVFSR%0A%3Epeptide34%0AVLLSICYLL%0A%3Epeptide35%0AYLGGMSYYC%0A%3Epeptide

In [None]:
# Export into CSV
final_train_results_mhci.to_csv('train_mhci_data.csv')

In [None]:
### Test Datset: MHC Processing Predictions

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = ['HLA-A*01:01','HLA-A*02:01']

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/processing/'

final_test_results_mhcp = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
            res = get_immunogenicity_df(method, converted_test_seqs, allele, '9')
            parts = res.split('\n')
            # Append to pd dataframe
            for part in parts[1: len(parts) - 1]:
                part = part.split('\t')
                if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
                  part = part[0:6] + part[8:]
                part.append(method)
                try:
                  final_test_results_mhcp.loc[len(final_test_results_mhcp.index)] = part
                except Exception as e: print(e)
print("Done!")

method=ann&sequence_text=%3Epeptide0%0AGMPPHMLPVL%0A%3Epeptide1%0AGLALLACAGL%0A%3Epeptide2%0ARIAQCFLRV%0A%3Epeptide3%0AALARWLPPV%0A%3Epeptide4%0ATHLMVLCCV%0A%3Epeptide5%0ALLIKKLPRV%0A%3Epeptide6%0ALLDQLIEEV%0A%3Epeptide7%0ALLDQLIEEV%0A%3Epeptide8%0AVLLNAPSEA%0A%3Epeptide9%0AYLLSGSDLFI%0A%3Epeptide10%0ALMIEYNLLT%0A%3Epeptide11%0AGLADGMEHL%0A%3Epeptide12%0AFLGGHVAVA%0A%3Epeptide13%0AFVVPILLKA%0A%3Epeptide14%0ATLACFVLAAV%0A%3Epeptide15%0AVLIAGYIIVF%0A%3Epeptide16%0ATLEDLLMGT%0A%3Epeptide17%0ATLEDLLMGT%0A%3Epeptide18%0ALMAVAILKEV%0A%3Epeptide19%0AGLGQVPLIV%0A%3Epeptide20%0AIMLEALERV%0A%3Epeptide21%0AYLLPEAEEI%0A%3Epeptide22%0AMLGIWFFTL%0A%3Epeptide23%0AGMVKAALEAI%0A%3Epeptide24%0AAAAWYLWEV%0A%3Epeptide25%0AKIRSDNIKKL%0A%3Epeptide26%0ALLIGICVAV%0A%3Epeptide27%0ANLDTLMTYV%0A%3Epeptide28%0ALLDTNYNLFY%0A%3Epeptide29%0AFLAADGHPA%0A%3Epeptide30%0ATLWYRAPEV%0A%3Epeptide31%0ATLWYRAPEV%0A%3Epeptide32%0AYLHPKEYEW%0A%3Epeptide33%0AVLWDYVYQL%0A%3Epeptide34%0AKLKKIKNSL%0A%3Epeptide35%0AKLIANNTRV%0A%3Ep

In [None]:
# Export into CSV
final_test_results_mhcp.to_csv('test_mhcp_data.csv')

In [None]:
### Train Dataset: MHC Processing Predictions

col_names = ['allele', 'seq_num', 'start', 'end', 'length', 'peptide', 'ic50', 'percentile_rank', 'method']

alleles = ['HLA-A*01:01','HLA-A*02:01']

# Removed 'consensus' as it gives additional results
# Note: 'netmhcpan_ba', 'netmhcpan_el', 'recommended' provides an additonal 'core' and 'icore' metric that I removed because it is same as peptide sequence due to length of sequence
methods = ['ann', 'comblib_sidney2008', 'netmhccons', 'netmhcpan_ba', 'netmhcpan_el', 'netmhcstabpan', 'pickpocket', 'recommended', 'smm', 'smmpmbec']

url = 'http://tools-cluster-interface.iedb.org/tools_api/processing/'

final_train_results_mhcp = pd.DataFrame(columns = col_names)

for method in methods:
  for allele in alleles:
    # HLA-A*01:01 does not work with comlib_sidney2008
    if (allele != 'HLA-A*01:01' or method != 'comblib_sidney2008'):
            res = get_immunogenicity_df(method, converted_train_seqs, allele, '9')
            parts = res.split('\n')
            # Append to pd dataframe
            for part in parts[1: len(parts) - 1]:
                part = part.split('\t')
                if (method in ['netmhcpan_ba', 'netmhcpan_el', 'recommended']):
                  part = part[0:6] + part[8:]
                part.append(method)
                try:
                  final_train_results_mhcp.loc[len(final_train_results_mhcp.index)] = part
                except Exception as e: print(e)
print("Done!")

method=ann&sequence_text=%3Epeptide0%0ASLILVSQYT%0A%3Epeptide1%0ALMSTLLIYL%0A%3Epeptide2%0ALLHTDFEQV%0A%3Epeptide3%0ALLHTDFEQV%0A%3Epeptide4%0AMMIDDFGTA%0A%3Epeptide5%0ASLLSGDWVL%0A%3Epeptide6%0AKTLETPEFV%0A%3Epeptide7%0AGLYDGMEHC%0A%3Epeptide8%0ALIIPFIHLI%0A%3Epeptide9%0AVLAFGFALL%0A%3Epeptide10%0ALLVRNSFEV%0A%3Epeptide11%0AVDSIFEQWL%0A%3Epeptide12%0ANELFDSLFPV%0A%3Epeptide13%0AIIALLFALV%0A%3Epeptide14%0AFVLVILARL%0A%3Epeptide15%0ADQVILLNKH%0A%3Epeptide16%0AVLILLLLIYL%0A%3Epeptide17%0AFLSEHPNVTL%0A%3Epeptide18%0AYLESFCEDV%0A%3Epeptide19%0ALMIFISSFL%0A%3Epeptide20%0AFLLVIGACV%0A%3Epeptide21%0AAMAVLYLAL%0A%3Epeptide22%0AAMAGASTSA%0A%3Epeptide23%0AVLAGSVDEL%0A%3Epeptide24%0APGLSISGNL%0A%3Epeptide25%0AILDKVLVHL%0A%3Epeptide26%0AFYLTNDVSF%0A%3Epeptide27%0AFYLTNDVSF%0A%3Epeptide28%0ASLAVVSTQL%0A%3Epeptide29%0ALLAILPYYV%0A%3Epeptide30%0ASLLRSLENV%0A%3Epeptide31%0ALIIPCIHLI%0A%3Epeptide32%0AKLVGKTVKV%0A%3Epeptide33%0AHVLKAVFSR%0A%3Epeptide34%0AVLLSICYLL%0A%3Epeptide35%0AYLGGMSYYC%0A%3Epeptide

In [None]:
# Export into CSV
final_train_results_mhcp.to_csv('train_mhcp_data.csv')