From a90c3fe91bca730cee530085f5e4989b66a367d0 Mon Sep 17 00:00:00 2001 From: Anderson Date: Thu, 14 Mar 2019 15:03:34 -0500 Subject: [PATCH] update --- nn_classifier.R | 5 ++--- pipeline.sh | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/nn_classifier.R b/nn_classifier.R index 5f712d1..1860f03 100644 --- a/nn_classifier.R +++ b/nn_classifier.R @@ -34,7 +34,7 @@ taxa <- colnames(pdm) for (i in 1:length(taxa)) { #Check if clade designation is present, skip if so defSplit = strsplit(taxa[i],"|", fixed=TRUE) - if(length(defSplit[[1]]) >=min(col_annotated)) { #Assumption! One delimiter (JC: can we make this more robust?) + if(length(defSplit[[1]]) >=col_annotated[1]) { #Assumption! One delimiter (JC: can we make this more robust?) next } @@ -42,8 +42,7 @@ for (i in 1:length(taxa)) { orderedList = sort(pdm[,i]) for (j in 1:length(orderedList)) { compSplit = strsplit(names(orderedList[j]),"|", fixed=TRUE) - - if(length(compSplit[[1]]) >=min(col_annotated)) { + if(length(compSplit[[1]]) >=col_annotated[1]) { out_str=taxa[i] for (ca in col_annotated){ # print out each column of annotations out_str=paste(out_str,compSplit[[1]][ca], sep='\t') diff --git a/pipeline.sh b/pipeline.sh index bd81e5a..76157c0 100755 --- a/pipeline.sh +++ b/pipeline.sh @@ -13,12 +13,12 @@ BASENAME=$(basename $1) REFERENCE=sample_data/reference.fa # Connect your programs here, can use full path names -BLASTN=~/bin/blastn -MAKEBLASTDB=~/bin/makeblastdb -SMOF=~/bin/smof -MAFFT=`which mafft` -FASTTREE=~/bin/FastTree -MICHAEL=nn_classifier.R +BLASTN=/usr/local/bin/blastn +MAKEBLASTDB=/usr/local/bin/makeblastdb +SMOF=/usr/local/bin/smof.py +MAFFT=/usr/local/bin/mafft +FASTTREE=/usr/local/bin/FastTreeMP +NN_CLASS=nn_classifier.R # Create your Blast Database ${MAKEBLASTDB} -in ${REFERENCE} -parse_seqids -dbtype nucl @@ -58,7 +58,7 @@ do echo "${SEG}" if [ -s ${BASENAME}_${SEG}.ids ] then - ${MAFFT} --auto ${BASENAME}_${SEG}.fa > ${BASENAME}_${SEG}_aln.fa + ${MAFFT} --thread -1 --auto ${BASENAME}_${SEG}.fa > ${BASENAME}_${SEG}_aln.fa ${FASTTREE} -nt ${BASENAME}_${SEG}_aln.fa > ${BASENAME}_${SEG}.tre fi done @@ -68,16 +68,16 @@ touch ${BASENAME}_Final_Output.txt rm ${BASENAME}_Final_Output.txt touch ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_H1.ids ] && Rscript ${MICHAEL} ${BASENAME}_H1.tre 5 6 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_H3.ids ] && Rscript ${MICHAEL} ${BASENAME}_H3.tre 5 6 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_N1.ids ] && Rscript ${MICHAEL} ${BASENAME}_N1.tre 5 7 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_N2.ids ] && Rscript ${MICHAEL} ${BASENAME}_N2.tre 5 7 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_PB2.ids ] && Rscript ${MICHAEL} ${BASENAME}_PB2.tre 5 8 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_PB1.ids ] && Rscript ${MICHAEL} ${BASENAME}_PB1.tre 5 8 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_PA.ids ] && Rscript ${MICHAEL} ${BASENAME}_PA.tre 5 8 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_NP.ids ] && Rscript ${MICHAEL} ${BASENAME}_NP.tre 5 8 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_M.ids ] && Rscript ${MICHAEL} ${BASENAME}_M.tre 5 8 >> ${BASENAME}_Final_Output.txt -[ -s ${BASENAME}_NS.ids ] && Rscript ${MICHAEL} ${BASENAME}_NS.tre 5 8 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_H1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_H1.tre 4 7 8 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_H3.ids ] && Rscript ${NN_CLASS} ${BASENAME}_H3.tre 4 7 8 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_N1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_N1.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_N2.ids ] && Rscript ${NN_CLASS} ${BASENAME}_N2.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_PB2.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PB2.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_PB1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PB1.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_PA.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PA.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_NP.ids ] && Rscript ${NN_CLASS} ${BASENAME}_NP.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_M.ids ] && Rscript ${NN_CLASS} ${BASENAME}_M.tre 5 1 >> ${BASENAME}_Final_Output.txt +[ -s ${BASENAME}_NS.ids ] && Rscript ${NN_CLASS} ${BASENAME}_NS.tre 5 1 >> ${BASENAME}_Final_Output.txt echo "==== Final results in ${BASENAME}_Final_Output.txt" echo "Tree files are listed below: "