Skip to content

Commit

Permalink
scripts for iteratively running the old unitigger
Browse files Browse the repository at this point in the history
  • Loading branch information
james committed Jan 28, 2014
1 parent e8a7565 commit 27e475e
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
5 changes: 5 additions & 0 deletions consensus.sh
@@ -0,0 +1,5 @@
#!/bin/bash

id=`printf "%03d" $SGE_TASK_ID`

/bluearc/home/schatz/gurtowsk/sources/wgs.svn/Linux-amd64/bin/utgcns -g cns.gkpStore -t mod.tigstore.03 1 $id -V
10 changes: 5 additions & 5 deletions correct.sh
Expand Up @@ -9,10 +9,10 @@ source ~/.bashrc
##SET THE FOLLOWING PARAMETERS

#path to correct script in pbtools repo
CORRECT_SCRIPT=/bluearc/home/schatz/gurtowsk/workspace/ectools/pb_correct.py
CORRECT_SCRIPT=/path/to/ectools/pb_correct.py

#pre filter delta file
PRE_DELTA_FILTER_SCRIPT=/bluearc/home/schatz/gurtowsk/workspace/ectools/pre_delta_filter.py
PRE_DELTA_FILTER_SCRIPT=/path/to/ectools/pre_delta_filter.py

#smallest alignment allowed, filter out alignments smaller than this
MIN_ALIGNMENT_LEN=200
Expand All @@ -25,7 +25,7 @@ WIGGLE_PCT=0.05
CONTAINED_PCT_ID=0.80

#path to high identity unitigs
UNITIG_FILE=/seq/schatz/a.thaliana/xiwang/james_workspace/nucmer-round-sim/misim.utg.g10frg.fa
UNITIG_FILE=/path/to/unitigs.fa

#Trim out regions with lower identity than
CLR_PCT_ID=0.96
Expand All @@ -40,15 +40,15 @@ FILE=p${suffix}

ORIGINAL_DIR=`pwd`

Move to sge temp storage
#Move to sge temp storage
if [[ $TMPDIR ]]
then
cd $TMPDIR
fi

cp ${ORIGINAL_DIR}/${FILE} .

nucmer --maxmatch -l 11 -b 2000 -g 1000 -p ${FILE} ${FILE} ${UNITIG_FILE}
nucmer --maxmatch -l 11 -b 10000 -g 1000 -p ${FILE} ${FILE} ${UNITIG_FILE}

cp ${FILE}.delta ${ORIGINAL_DIR}

Expand Down
33 changes: 33 additions & 0 deletions tigiterate.sh
@@ -0,0 +1,33 @@
#!/bin/bash

#symlink unitigs.layout
#symlink ovlStore
#cp gkpStore to gkpStore.modified

GENOME_SIZE=12000000
FRAGS_PER_PARTITION=10
ERROR_RATE=0.02

for i in {01..09}
do
cat unitigs.layout | awk '{if($1=="FRG"){$1=$2=$3=$4=""; print $0}else if($1=="unitig"){print}}' |sed "s/^\s*//" | uniq > unitigs.layout.${i}.clean

cat unitigs.layout.${i}.clean | awk '{print $1}' | sed "s/unitig/>/g" | tr '\n' ' ' | tr '>' '\n' | awk 'NF < 3' | tr ' ' '\n' | awk 'NF >0' | awk '{ print "frg iid "$1" isdeleted t" }' > bad.${i}

#python ~/workspace/ectools/filter.py <(/bluearc/data/schatz/mschatz/devel/bin/tig_length.pl unitigs.layout | awk '$2 < 100000' | awk '{ print $1}') <(less unitigs.layout.${i}.clean | sed "s/unitig />/g" | awk '{print $1}' | tr '\n' ' ' | tr '>' '\n' | awk 'NF > 0') | awk '{ for(i=2;i<=NF;i++){print $i}}' | awk '{print "frg iid "$1" isdeleted t" }' > bad.${i}

/bluearc/home/schatz/gurtowsk/sources/wgs.svn/Linux-amd64/bin/gatekeeper --edit bad.${i} gkpStore.modified &> mod.log.${i}

/bluearc/home/schatz/gurtowsk/sources/wgs.svn.utgdevel/Linux-amd64/bin/unitigger -I ovlStore -F gkpStore.modified -T mod.tigstore.${i} -B $FRAGS_PER_PARTITION -e $ERROR_RATE -k -d 1 -x 5 -z 10 -j 5 -U 1 -o utg.${i} &> log.${i}

/bluearc/home/schatz/gurtowsk/sources/wgs.svn/Linux-amd64/bin/tigStore -g gkpStore.modified -t mod.tigstore.${i} 1 -d layout -U > unitigs.layout.${i}

/bluearc/data/schatz/mschatz/devel/bin/tig_length.pl unitigs.layout.${i} | sort -n -r -k2,2 > unitigs.layout.${i}.lens

stats -f 2 -n50 $GENOME_SIZE unitigs.layout.${i}.lens | tee unitigs.layout.${i}.stats

unlink unitigs.layout

ln -s unitigs.layout.${i} unitigs.layout

done

0 comments on commit 27e475e

Please sign in to comment.