In [8]:
import multiprocessing
import pandas as pd
import numpy as np
import subprocess
import random
from scipy import stats
import glob
import math
import csv
import sys
import os

import matplotlib.pyplot as plt
from matplotlib import animation
import seaborn as sns
plt.rcParams['figure.figsize'] = (20.0, 10.0)
plt.rcParams['font.family'] = "serif"
%matplotlib inline

In [11]:
# declarations
out_dir = "analysis_21042020/"
idx_dir = "data/indices/"

num_tissues = 3
num_samples = 10

readlen = 101

kallisto_path = "kallisto"
salmon_path = "salmon"

num_threads = 8
num_processes = 4

In [13]:
# quantify with salmon
def run_salmon(ts):
    tn = ts[0]
    sn = ts[1]
    print("\n=================\nTissue #"+str(tn)+" - Sample #"+str(sn)+"\n=================\n")
    salmon_cmd = ["salmon","quant","--validateMappings","-l","A",
                  "-i",idx_dir+"annotation.salmon",
                  "-p",str(num_threads),
                  "-o",out_dir+"slmn.real.t"+str(tn)+"_s"+str(sn),
                  "-r",out_dir+"real.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(salmon_cmd)

    salmon_cmd = ["salmon","quant","--validateMappings","-l","A",
                  "-i",idx_dir+"annotation.salmon",
                  "-p",str(num_threads),
                  "-o",out_dir+"slmn.real_splicing.t"+str(tn)+"_s"+str(sn),
                  "-r",out_dir+"real_splicing.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(salmon_cmd)

    salmon_cmd = ["salmon","quant","--validateMappings","-l","A",
                  "-i",idx_dir+"annotation.salmon",
                  "-p",str(num_threads),
                  "-o",out_dir+"slmn.real_intronic.t"+str(tn)+"_s"+str(sn),
                  "-r",out_dir+"real_intronic.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(salmon_cmd)

    salmon_cmd = ["salmon","quant","--validateMappings","-l","A",
                  "-i",idx_dir+"annotation.salmon",
                  "-p",str(num_threads),
                  "-o",out_dir+"slmn.real_intergenic.t"+str(tn)+"_s"+str(sn),
                  "-r",out_dir+"real_intergenic.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(salmon_cmd)

    salmon_cmd = ["salmon","quant","--validateMappings","-l","A",
                  "-i",idx_dir+"annotation.salmon",
                  "-p",str(num_threads),
                  "-o",out_dir+"slmn.all.t"+str(tn)+"_s"+str(sn),
                  "-r",out_dir+"all.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(salmon_cmd)

In [14]:
samples = []
for tn in range(num_tissues):
    for sn in range(num_samples):
        samples.append((tn,sn))

In [15]:
pool = multiprocessing.Pool(processes=num_processes)
pool_outputs = pool.map(run_salmon, samples)
pool.close()
pool.join()
print('Pool:', pool_outputs)


Tissue #0 - Sample #0

Tissue #0 - Sample #2

Tissue #0 - Sample #6

Tissue #0 - Sample #4





Tissue #0 - Sample #1

Tissue #0 - Sample #3

Tissue #0 - Sample #5

Tissue #0 - Sample #7





Tissue #0 - Sample #8

Tissue #1 - Sample #0


Tissue #1 - Sample #2

Tissue #1 - Sample #4


Tissue #0 - Sample #9




Tissue #1 - Sample #6


Tissue #1 - Sample #7


Tissue #1 - Sample #1


Tissue #1 - Sample #3


Tissue #1 - Sample #5


Tissue #1 - Sample #8


Tissue #2 - Sample #0


Tissue #2 - Sample #2


Tissue #2 - Sample #4


Tissue #1 - Sample #9


Tissue #2 - Sample #3


Tissue #2 - Sample #1


Tissue #2 - Sample #5


Tissue #2 - Sample #6


Tissue #2 - Sample #8


Tissue #2 - Sample #9


Tissue #2 - Sample #7

Pool: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]


In [16]:
# quantify with kallisto
def run_kallisto(ts):
    tn = ts[0]
    sn = ts[1]
    print("\n=================\nTissue #"+str(tn)+" - Sample #"+str(sn)+"\n=================\n")
    klst_cmd = ["kallisto","quant",
                "-i",idx_dir+"annotation.kallisto",
                "-o",out_dir+"klst.real.t"+str(tn)+"_s"+str(sn),
                "-t",str(num_threads),
                "-l",str(readlen),"-s",str(0.0001),
                "--single",out_dir+"real.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(klst_cmd)

    klst_cmd = ["kallisto","quant",
                "-i",idx_dir+"annotation.kallisto",
                "-o",out_dir+"klst.real_splicing.t"+str(tn)+"_s"+str(sn),
                "-t",str(num_threads),
                "-l",str(readlen),"-s",str(0.0001),
                "--single",out_dir+"real_splicing.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(klst_cmd)

    klst_cmd = ["kallisto","quant",
                "-i",idx_dir+"annotation.kallisto",
                "-o",out_dir+"klst.real_intronic.t"+str(tn)+"_s"+str(sn),
                "-t",str(num_threads),
                "-l",str(readlen),"-s",str(0.0001),
                "--single",out_dir+"real_intronic.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(klst_cmd)

    klst_cmd = ["kallisto","quant",
                "-i",idx_dir+"annotation.kallisto",
                "-o",out_dir+"klst.real_intergenic.t"+str(tn)+"_s"+str(sn),
                "-t",str(num_threads),
                "-l",str(readlen),"-s",str(0.0001),
                "--single",out_dir+"real_intergenic.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(klst_cmd)

    klst_cmd = ["kallisto","quant",
                "-i",idx_dir+"annotation.kallisto",
                "-o",out_dir+"klst.all.t"+str(tn)+"_s"+str(sn),
                "-t",str(num_threads),
                "-l",str(readlen),"-s",str(0.0001),
                "--single",out_dir+"all.t"+str(tn)+"_s"+str(sn)+"/sample_01.shuffled.fasta"]
    subprocess.call(klst_cmd)

In [17]:
pool = multiprocessing.Pool(processes=num_processes)
pool_outputs = pool.map(run_kallisto, samples)
pool.close()
pool.join()
print('Pool:', pool_outputs)


Tissue #0 - Sample #0

Tissue #0 - Sample #6



Tissue #0 - Sample #2

Tissue #0 - Sample #4



Tissue #0 - Sample #3


Tissue #0 - Sample #5


Tissue #0 - Sample #7


Tissue #0 - Sample #1


Tissue #0 - Sample #8


Tissue #1 - Sample #0


Tissue #1 - Sample #2


Tissue #1 - Sample #4


Tissue #1 - Sample #1


Tissue #0 - Sample #9


Tissue #1 - Sample #3


Tissue #1 - Sample #5


Tissue #1 - Sample #6


Tissue #1 - Sample #8


Tissue #2 - Sample #0


Tissue #2 - Sample #2


Tissue #1 - Sample #7


Tissue #1 - Sample #9


Tissue #2 - Sample #3


Tissue #2 - Sample #1


Tissue #2 - Sample #4


Tissue #2 - Sample #6


Tissue #2 - Sample #8


Tissue #2 - Sample #5


Tissue #2 - Sample #9


Tissue #2 - Sample #7

Pool: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
