Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Anderson authored and Anderson committed Mar 14, 2019
1 parent 07492bb commit a90c3fe
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 20 deletions.
5 changes: 2 additions & 3 deletions nn_classifier.R
Expand Up @@ -34,16 +34,15 @@ taxa <- colnames(pdm)
for (i in 1:length(taxa)) {
#Check if clade designation is present, skip if so
defSplit = strsplit(taxa[i],"|", fixed=TRUE)
if(length(defSplit[[1]]) >=min(col_annotated)) { #Assumption! One delimiter (JC: can we make this more robust?)
if(length(defSplit[[1]]) >=col_annotated[1]) { #Assumption! One delimiter (JC: can we make this more robust?)
next
}

#Find the nearest neighbor with a clade label otherwise. Starting at index 1 incase 100% identity label match
orderedList = sort(pdm[,i])
for (j in 1:length(orderedList)) {
compSplit = strsplit(names(orderedList[j]),"|", fixed=TRUE)

if(length(compSplit[[1]]) >=min(col_annotated)) {
if(length(compSplit[[1]]) >=col_annotated[1]) {
out_str=taxa[i]
for (ca in col_annotated){ # print out each column of annotations
out_str=paste(out_str,compSplit[[1]][ca], sep='\t')
Expand Down
34 changes: 17 additions & 17 deletions pipeline.sh
Expand Up @@ -13,12 +13,12 @@ BASENAME=$(basename $1)
REFERENCE=sample_data/reference.fa

# Connect your programs here, can use full path names
BLASTN=~/bin/blastn
MAKEBLASTDB=~/bin/makeblastdb
SMOF=~/bin/smof
MAFFT=`which mafft`
FASTTREE=~/bin/FastTree
MICHAEL=nn_classifier.R
BLASTN=/usr/local/bin/blastn
MAKEBLASTDB=/usr/local/bin/makeblastdb
SMOF=/usr/local/bin/smof.py
MAFFT=/usr/local/bin/mafft
FASTTREE=/usr/local/bin/FastTreeMP
NN_CLASS=nn_classifier.R

# Create your Blast Database
${MAKEBLASTDB} -in ${REFERENCE} -parse_seqids -dbtype nucl
Expand Down Expand Up @@ -58,7 +58,7 @@ do
echo "${SEG}"
if [ -s ${BASENAME}_${SEG}.ids ]
then
${MAFFT} --auto ${BASENAME}_${SEG}.fa > ${BASENAME}_${SEG}_aln.fa
${MAFFT} --thread -1 --auto ${BASENAME}_${SEG}.fa > ${BASENAME}_${SEG}_aln.fa
${FASTTREE} -nt ${BASENAME}_${SEG}_aln.fa > ${BASENAME}_${SEG}.tre
fi
done
Expand All @@ -68,16 +68,16 @@ touch ${BASENAME}_Final_Output.txt
rm ${BASENAME}_Final_Output.txt
touch ${BASENAME}_Final_Output.txt

[ -s ${BASENAME}_H1.ids ] && Rscript ${MICHAEL} ${BASENAME}_H1.tre 5 6 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_H3.ids ] && Rscript ${MICHAEL} ${BASENAME}_H3.tre 5 6 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_N1.ids ] && Rscript ${MICHAEL} ${BASENAME}_N1.tre 5 7 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_N2.ids ] && Rscript ${MICHAEL} ${BASENAME}_N2.tre 5 7 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PB2.ids ] && Rscript ${MICHAEL} ${BASENAME}_PB2.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PB1.ids ] && Rscript ${MICHAEL} ${BASENAME}_PB1.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PA.ids ] && Rscript ${MICHAEL} ${BASENAME}_PA.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_NP.ids ] && Rscript ${MICHAEL} ${BASENAME}_NP.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_M.ids ] && Rscript ${MICHAEL} ${BASENAME}_M.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_NS.ids ] && Rscript ${MICHAEL} ${BASENAME}_NS.tre 5 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_H1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_H1.tre 4 7 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_H3.ids ] && Rscript ${NN_CLASS} ${BASENAME}_H3.tre 4 7 8 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_N1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_N1.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_N2.ids ] && Rscript ${NN_CLASS} ${BASENAME}_N2.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PB2.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PB2.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PB1.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PB1.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_PA.ids ] && Rscript ${NN_CLASS} ${BASENAME}_PA.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_NP.ids ] && Rscript ${NN_CLASS} ${BASENAME}_NP.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_M.ids ] && Rscript ${NN_CLASS} ${BASENAME}_M.tre 5 1 >> ${BASENAME}_Final_Output.txt
[ -s ${BASENAME}_NS.ids ] && Rscript ${NN_CLASS} ${BASENAME}_NS.tre 5 1 >> ${BASENAME}_Final_Output.txt

echo "==== Final results in ${BASENAME}_Final_Output.txt"
echo "Tree files are listed below: "
Expand Down

0 comments on commit a90c3fe

Please sign in to comment.