From fb3328daffcec215549d7efef081468cd8005dd7 Mon Sep 17 00:00:00 2001 From: Jesse Min Zhang Date: Sat, 27 Feb 2016 19:13:44 -0800 Subject: [PATCH] Added comments --- Trapnell_pipeline/Trapnell_wrapper.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Trapnell_pipeline/Trapnell_wrapper.py b/Trapnell_pipeline/Trapnell_wrapper.py index 892ff73..49fd8be 100644 --- a/Trapnell_pipeline/Trapnell_wrapper.py +++ b/Trapnell_pipeline/Trapnell_wrapper.py @@ -3,13 +3,15 @@ import sys import numpy as np +# Parse the options from command line try: opts, args = getopt.getopt(sys.argv[1:],"i:n:k:t:",["idir=","njobs=","hacked-kallisto-path=","reference-transcriptome="]) except getopt.GetoptError: print ("getopterrror") print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]') sys.exit(1) - + +# The four options used in this pipeline are SRA_dir='' num_proc=1 kallipso_path='' @@ -25,23 +27,25 @@ elif opt in ("-t","--reference-transcriptome"): ref_transcriptome=arg - +# Error message if function not called correctly if (not SRA_dir) or (not kallipso_path) or (not ref_transcriptome): print ('usage is : \n python Trapnell_wrapper.py -i input_SRA_dir -k path-to-hacked-kallisto -t path-to-human-reference-transcriptome [-n number-of-processes-to-use]') sys.exit(1) +# Execute process_SRA, which converts SRA to fastq print('Extracting reads from SRAs...') os.system('mkdir -p ./reads/') os.system('rm -f ./reads/*') os.system('python process_SRA.py -i '+SRA_dir+' -o ./reads/ -n '+str(num_proc)) +# This removes files that Trapnell didn't use. (Specific for analysis of this dataset) print('Removing files that Trapnell throws...') relevant_fls=np.loadtxt('Files_to_keep.txt',dtype=str) fls_to_remove=[cells for cells in os.listdir('./reads/') if cells.split('_')[0] not in relevant_fls ] for fl in fls_to_remove: os.system('rm '+'./reads/'+fl) - +# Use kallisto to build index. should return same results as hacked kallisto print('Generating the Kallisto index (with hacked kallisto)...') os.system('mkdir -p ./kallisto_index') os.system('rm -f ./kallisto_index/*') @@ -52,15 +56,16 @@ num_ec = sum(1 for line in open('./kallisto_index/Trapnell_index.idx_ecmap.txt')) print(num_ec) +# Use hacked kallisto to generate TCCs (output is .class file that contains counts of ECs) print('Generating TCC (with hacked kallisto)...') os.system('mkdir -p ./transcript_compatibility_counts/') os.system('rm -f ./transcript_compatibility_counts/*') os.system('python get_pseudoalignments_paired_end.py -i ./reads/ -o ./transcript_compatibility_counts/ -k '+kallipso_path+ ' -t '+ index_path +' -n '+ str(num_proc)) +# Process the .class file into a matrix. normalize such that each row sums to 1 print('Generating TCC distribution...') - os.system('python get_tcc_dist.py -i ./transcript_compatibility_counts/ -m '+str(num_ec)+' -t ./Trapnell_TCC.dat -d ./Trapnell_TCC_distribution.dat') +# Process the TCC distribution matrix to get pairwise distributions print('Generating pairwise distances...') - os.system('python get_pairwise_distances.py ./Trapnell_TCC_distribution.dat ./Trapnell_TCC_pairwise_distance.dat '+str(num_proc))