|
|
@@ -3,13 +3,15 @@ |
|
|
import sys
|
|
|
import numpy as np
|
|
|
|
|
|
+# Parse the options from command line
|
|
|
try:
|
|
|
opts, args = getopt.getopt(sys.argv[1:],"i:n:k:t:",["idir=","njobs=","hacked-kallisto-path=","reference-transcriptome="])
|
|
|
except getopt.GetoptError:
|
|
|
print ("getopterrror")
|
|
|
print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]')
|
|
|
sys.exit(1)
|
|
|
-
|
|
|
+
|
|
|
+# The four options used in this pipeline are
|
|
|
SRA_dir=''
|
|
|
num_proc=1
|
|
|
kallipso_path=''
|
|
|
@@ -25,23 +27,25 @@ |
|
|
elif opt in ("-t","--reference-transcriptome"):
|
|
|
ref_transcriptome=arg
|
|
|
|
|
|
-
|
|
|
+# Error message if function not called correctly
|
|
|
if (not SRA_dir) or (not kallipso_path) or (not ref_transcriptome):
|
|
|
print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]')
|
|
|
sys.exit(1)
|
|
|
|
|
|
+# Execute process_SRA, which converts SRA to fastq
|
|
|
print('Extracting reads from SRAs...')
|
|
|
os.system('mkdir -p ./reads/')
|
|
|
os.system('rm -f ./reads/*')
|
|
|
os.system('python process_SRA.py -i '+SRA_dir+' -o ./reads/ -n '+str(num_proc))
|
|
|
|
|
|
+# This removes files that Trapnell didn't use. (Specific for analysis of this dataset)
|
|
|
print('Removing files that Trapnell throws...')
|
|
|
relevant_fls=np.loadtxt('Files_to_keep.txt',dtype=str)
|
|
|
fls_to_remove=[cells for cells in os.listdir('./reads/') if cells.split('_')[0] not in relevant_fls ]
|
|
|
for fl in fls_to_remove:
|
|
|
os.system('rm '+'./reads/'+fl)
|
|
|
|
|
|
-
|
|
|
+# Use kallisto to build index. should return same results as hacked kallisto
|
|
|
print('Generating the Kallisto index (with hacked kallisto)...')
|
|
|
os.system('mkdir -p ./kallisto_index')
|
|
|
os.system('rm -f ./kallisto_index/*')
|
|
|
@@ -52,15 +56,16 @@ |
|
|
num_ec = sum(1 for line in open('./kallisto_index/Trapnell_index.idx_ecmap.txt'))
|
|
|
print(num_ec)
|
|
|
|
|
|
+# Use hacked kallisto to generate TCCs (output is .class file that contains counts of ECs)
|
|
|
print('Generating TCC (with hacked kallisto)...')
|
|
|
os.system('mkdir -p ./transcript_compatibility_counts/')
|
|
|
os.system('rm -f ./transcript_compatibility_counts/*')
|
|
|
os.system('python get_pseudoalignments_paired_end.py -i ./reads/ -o ./transcript_compatibility_counts/ -k '+kallipso_path+ ' -t '+ index_path +' -n '+ str(num_proc))
|
|
|
|
|
|
+# Process the .class file into a matrix. normalize such that each row sums to 1
|
|
|
print('Generating TCC distribution...')
|
|
|
-
|
|
|
os.system('python get_tcc_dist.py -i ./transcript_compatibility_counts/ -m '+str(num_ec)+' -t ./Trapnell_TCC.dat -d ./Trapnell_TCC_distribution.dat')
|
|
|
|
|
|
+# Process the TCC distribution matrix to get pairwise distributions
|
|
|
print('Generating pairwise distances...')
|
|
|
-
|
|
|
os.system('python get_pairwise_distances.py ./Trapnell_TCC_distribution.dat ./Trapnell_TCC_pairwise_distance.dat '+str(num_proc))
|
0 comments on commit
fb3328d