Permalink
Browse files

Added comments

  • Loading branch information...
1 parent 192fce5 commit fb3328daffcec215549d7efef081468cd8005dd7 @jessemzhang jessemzhang committed Feb 28, 2016
Showing with 10 additions and 5 deletions.
  1. +10 −5 Trapnell_pipeline/Trapnell_wrapper.py
@@ -3,13 +3,15 @@
import sys
import numpy as np
+# Parse the options from command line
try:
opts, args = getopt.getopt(sys.argv[1:],"i:n:k:t:",["idir=","njobs=","hacked-kallisto-path=","reference-transcriptome="])
except getopt.GetoptError:
print ("getopterrror")
print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]')
sys.exit(1)
-
+
+# The four options used in this pipeline are
SRA_dir=''
num_proc=1
kallipso_path=''
@@ -25,23 +27,25 @@
elif opt in ("-t","--reference-transcriptome"):
ref_transcriptome=arg
-
+# Error message if function not called correctly
if (not SRA_dir) or (not kallipso_path) or (not ref_transcriptome):
print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]')
sys.exit(1)
+# Execute process_SRA, which converts SRA to fastq
print('Extracting reads from SRAs...')
os.system('mkdir -p ./reads/')
os.system('rm -f ./reads/*')
os.system('python process_SRA.py -i '+SRA_dir+' -o ./reads/ -n '+str(num_proc))
+# This removes files that Trapnell didn't use. (Specific for analysis of this dataset)
print('Removing files that Trapnell throws...')
relevant_fls=np.loadtxt('Files_to_keep.txt',dtype=str)
fls_to_remove=[cells for cells in os.listdir('./reads/') if cells.split('_')[0] not in relevant_fls ]
for fl in fls_to_remove:
os.system('rm '+'./reads/'+fl)
-
+# Use kallisto to build index. should return same results as hacked kallisto
print('Generating the Kallisto index (with hacked kallisto)...')
os.system('mkdir -p ./kallisto_index')
os.system('rm -f ./kallisto_index/*')
@@ -52,15 +56,16 @@
num_ec = sum(1 for line in open('./kallisto_index/Trapnell_index.idx_ecmap.txt'))
print(num_ec)
+# Use hacked kallisto to generate TCCs (output is .class file that contains counts of ECs)
print('Generating TCC (with hacked kallisto)...')
os.system('mkdir -p ./transcript_compatibility_counts/')
os.system('rm -f ./transcript_compatibility_counts/*')
os.system('python get_pseudoalignments_paired_end.py -i ./reads/ -o ./transcript_compatibility_counts/ -k '+kallipso_path+ ' -t '+ index_path +' -n '+ str(num_proc))
+# Process the .class file into a matrix. normalize such that each row sums to 1
print('Generating TCC distribution...')
-
os.system('python get_tcc_dist.py -i ./transcript_compatibility_counts/ -m '+str(num_ec)+' -t ./Trapnell_TCC.dat -d ./Trapnell_TCC_distribution.dat')
+# Process the TCC distribution matrix to get pairwise distributions
print('Generating pairwise distances...')
-
os.system('python get_pairwise_distances.py ./Trapnell_TCC_distribution.dat ./Trapnell_TCC_pairwise_distance.dat '+str(num_proc))

0 comments on commit fb3328d

Please sign in to comment.