In [1]:
from subprocess import Popen, call, check_call, STDOUT, PIPE
import os
from tempfile import NamedTemporaryFile
import pandas as pd
from numpy import mean
from tqdm import tqdm
from collections import defaultdict
from multiprocessing import Pool, Process
from time import sleep
import pandas as pd

In [2]:
os.chdir("/home/urban/mhc/")

In [3]:
MHCII = "/home/urban/mhc2/mhc_ii/mhc_II_binding.py"
methods = ["nn_align", "comblib", "smm_align", "netmhcpan"]
#methods = ["sturniolo"]

In [4]:
def run_mhcii(sequence, method, allele, resmin=True):
    tf = NamedTemporaryFile(suffix=".fa", delete=False)
    tf.writelines([">1\n", sequence])
    tf.close()
    proc = Popen([MHCII, method, allele, tf.name], stdin=PIPE, stdout=PIPE, stderr=PIPE)
    proc.wait()
    os.unlink(tf.name)
    result = proc.communicate()[0]
    try:
        target_index = result.split('\n')[0].split('\t').index("ic50")
    except ValueError:
        return "NA"
    targets = [float(l.split('\t')[target_index]) for l in result.split('\n')[1:] if l]
    if targets:
        return min(targets) if resmin else targets
    else:
        return "NA"

In [5]:
def run_mhcii_meth(sequence, methods, allele,  aggregate = mean):
    return aggregate([run_mhcii(sequence, meth, allele) for meth in methods])

In [6]:
def run_mhcii_allen(sequence, method, alleles):
    return {sequence:[(p, run_mhcii(sequence, method, p))  for p in alleles]}


In [7]:
def run_mhcii_allen_(par):
    sequence = par['sequence']
    method = par['method']
    alleles = par['alleles']
    return (sequence, [(p, run_mhcii(sequence, method, p) ) for p in alleles])


In [8]:
def run_mhcii_peptides(peptides, alleles, method, processes=24):
    result = []
    pool = Pool(processes=processes)
    args = [{'sequence':s, 'method':method, 'alleles':alleles} for s in peptides]
   
    for res in tqdm(pool.imap_unordered(run_mhcii_allen_, args), total=len(peptides)):
        result.append(res)
    pool.close()
    pool.join()
    return result

In [9]:
def write_res_table(results, filename):
    with open(filename, "w") as f:
        f.write("peptide\tmodel\tic50\n")
        for pep, allist in results:
            for al, ic50 in allist:
                f.write("{}\t{}\t{}\n".format(pep, al, ic50))

In [None]:
#allelesii = [(l.strip() ) for l in open("hla_ref_set.class_ii.txt").readlines()]
#randHS = [l.strip() for l in open("1740_HS_random_peptides.txt").readlines()]
#randMB = [l.strip() for l in open("1740_MB_random_peptides.txt").readlines()]
#randNC = [l.strip() for l in open("1740_NCBI_random_peptides.txt").readlines()]

In [None]:
allelesii = [(l.strip() ) for l in open("hla_ref_set.class_ii.txt").readlines()]
randHS = [l.strip() for l in open("4368_HS_random_peptides.txt").readlines()]
randMB = [l.strip() for l in open("4368_MB_random_peptides.txt").readlines()]
randNC = [l.strip() for l in open("4368_NC_random_peptides.txt").readlines()]

In [None]:
allelesii = [(l.strip() ) for l in open("hla_ref_set.class_ii.txt").readlines()]
bacpeps = [l.strip() for l in open("bacpep2gr.txt").readlines()]

In [10]:
allelesii = [(l.strip() ) for l in open("hla_ref_set.class_ii.txt").readlines()]
humpeps = [l.strip() for l in open("humpeps2gr.txt").readlines()]

In [None]:
ind = pd.read_table("hIndividuals.tsv")

ind.set_index("sequence", inplace=True)

individuals = {}
for c in ind.columns:
    individuals[c] = list(ind[ind.loc[:,c]>0].index.values)
    with open(c+"_peps.txt", "w") as f:
        for p in individuals[c]:
            f.write(p+'\n')

In [None]:
os.chdir("/home/urban/mhc/")
for m in methods:
    outdir = os.path.join("h2", m)
    os.mkdir(outdir)
    for name, peps in zip(["randHS", "randMB", "randNC"],[randHS, randMB, randNC]):
        print "Running {}:{}".format(m,name)
        r = run_mhcii_peptides(peps, allelesii, m)
        write_res_table(r, os.path.join(outdir, name+".tsv"))
    
    for name, peps in individuals.items():
        print "Running {}:{}".format(m,name)
        r = run_mhcii_peptides(peps, allelesii, m)
        write_res_table(r, os.path.join(outdir, name+".tsv"))

In [None]:
for m in methods:
    r = run_mhcii_peptides(bacpeps, allelesii, m)
    write_res_table(r, os.path.join(".", "bacpeps2gr_ii_"+m+".tsv"))


In [12]:
for m in methods:
    r = run_mhcii_peptides(humpeps, allelesii, m)
    write_res_table(r, os.path.join(".", "humpeps2gr_ii_"+m+".tsv"))


100%|██████████| 739/739 [01:50<00:00,  2.47it/s]
100%|██████████| 739/739 [00:39<00:00, 18.90it/s]
100%|██████████| 739/739 [01:11<00:00, 10.29it/s]
100%|██████████| 739/739 [00:35<00:00, 20.59it/s]
