Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
the public release tgz of velvet contains a one line difference in th…
…e test fasta swapping NNN for AAA, and also a complete directory tree of MetaVelvet-v0.3.1
- Loading branch information
Showing
71 changed files
with
27,425 additions
and
1 deletion.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
CC = gcc | ||
CFLAGS = -Wall | ||
DEBUG = -g | ||
LDFLAGS = -lm | ||
OPT = -O3 | ||
MAXKMERLENGTH=31 | ||
CATEGORIES=2 | ||
DEF = -D MAXKMERLENGTH=$(MAXKMERLENGTH) -D CATEGORIES=$(CATEGORIES) | ||
|
||
Z_LIB_DIR=../../third-party/zlib-1.2.3 | ||
Z_LIB_FILES=$(Z_LIB_DIR)/*.o | ||
|
||
# Mac OS users: uncomment the following lines | ||
# Z_LIB_FILES= | ||
# LDFLAGS = -lm -lz | ||
# CFLAGS = -Wall -m64 | ||
|
||
# Sparc/Solaris users: uncomment the following line | ||
# CFLAGS = -Wall -m64 | ||
|
||
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/crc.o obj/utility.o obj/kmer.o obj/scaffold.o | ||
OBJDBG = $(subst obj,obj/dbg,$(OBJ)) | ||
|
||
default : cleanobj zlib obj meta-velveth meta-velvetg | ||
|
||
clean : | ||
-rm obj/*.o obj/dbg/*.o ./meta-velvet* | ||
cd $(Z_LIB_DIR) && make clean | ||
|
||
cleanobj: | ||
-rm obj/*.o obj/dbg/*.o | ||
|
||
zlib : | ||
cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o | ||
|
||
meta-velveth : obj | ||
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o meta-velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/crc.o obj/utility.o obj/kmer.o $(Z_LIB_FILES) | ||
|
||
|
||
meta-velvetg : obj | ||
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o meta-velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o $(Z_LIB_FILES) | ||
|
||
debug : cleanobj obj/dbg | ||
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o meta-velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/crc.o obj/dbg/utility.o obj/dbg/kmer.o $(Z_LIB_FILES) | ||
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o meta-velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o $(Z_LIB_FILES) | ||
|
||
color : cleanobj obj_de | ||
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o meta-velveth_de obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/crc.o obj/utility.o obj/kmer.o $(Z_LIB_FILES) | ||
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg_de obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o $(Z_LIB_FILES) | ||
|
||
colordebug : cleanobj obj/dbg_de | ||
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o meta-velveth_de obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/crc.o obj/dbg/utility.o obj/dbg/kmer.o $(Z_LIB_FILES) | ||
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg_de obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o $(Z_LIB_FILES) | ||
|
||
objdir: | ||
mkdir -p obj | ||
|
||
obj: zlib objdir $(OBJ) | ||
|
||
obj_de: override DEF := $(DEF) -D COLOR | ||
obj_de: zlib cleanobj objdir $(OBJ) | ||
|
||
obj/dbgdir: | ||
mkdir -p obj/dbg | ||
|
||
obj/dbg: zlib obj/dbgdir $(OBJDBG) | ||
|
||
obj/dbg_de: override DEF := $(DEF) -D COLOR | ||
obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG) | ||
|
||
obj/%.o: src/%.c | ||
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@ | ||
|
||
obj/dbg/%.o: src/%.c | ||
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
README.TXT | ||
|
||
VELVET SOURCE | ||
March 28 2008 | ||
Daniel Zerbino | ||
|
||
NOTE: The PDF manual in this directory contains all the information contained | ||
in this text file, plus much more! | ||
|
||
> SUMMARY | ||
* A/ REQUIREMENTS | ||
* B/ COMPILING INSTRUCTIONS | ||
* C/ WHERE IS THE MANUAL? | ||
|
||
---------------------------------------------------------------------------------- | ||
A/ REQUIREMENTS | ||
|
||
Velvet should function on any standard 64bit Linx environment with | ||
gcc. A good amount of physical memory (12GB to start with, more is no luxury) | ||
is recommended. | ||
|
||
---------------------------------------------------------------------------------- | ||
B/ COMPILING INSTRUCTIONS | ||
|
||
Normally, with a GNU environment, just type: | ||
|
||
> make | ||
|
||
For colorspace Velvet replace that command with | ||
|
||
> make color | ||
|
||
Otherwise compile each *.c file separately, then execute the default | ||
instructions at the top of Makefile. | ||
|
||
---------------------------------------------------------------------------------- | ||
C/ WHERE IS THE MANUAL? | ||
|
||
If you cannot find the PDF manual in the source directory (probably because | ||
you downloaded Velvet through git), you can simply compile it: | ||
|
||
> make doc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
#!/usr/bin/env python2.5 | ||
# -*- coding: utf-8 -*- | ||
|
||
import sys | ||
import math | ||
import random | ||
|
||
|
||
# Define functions | ||
def importStats(fin_stats): | ||
dicStats = {} | ||
listHeader = [] | ||
|
||
while True: | ||
line = fin_stats.readline() | ||
if not line: | ||
break | ||
|
||
if len(dicStats) == 0: | ||
listHeader = line.rstrip("\n").split("\t") | ||
for header in listHeader: | ||
dicStats[header] = [] | ||
else: | ||
listStats = line.rstrip("\n").split("\t") | ||
for i in range(len(listStats)): | ||
if i in [0, 1, 2, 3, 9, 10, 11]: | ||
stats = int(listStats[i]) | ||
else: | ||
stats = float(listStats[i]) | ||
dicStats[listHeader[i]].append(stats) | ||
|
||
return dicStats | ||
|
||
|
||
def weightedHisto(dicStats, xMin, xMax, binWidth): | ||
dicHisto = {} | ||
listShort1Cov = dicStats["short1_cov"] | ||
listLgth = dicStats["lgth"] | ||
|
||
for x in range(xMin, xMax, binWidth): | ||
dicHisto[x] = 0 | ||
|
||
for i in range(len(listShort1Cov)): | ||
cov = listShort1Cov[i] | ||
if cov < xMin or cov >= xMax: | ||
continue | ||
for x in range(xMin, xMax+binWidth, binWidth): | ||
if (cov >= x and cov < x + binWidth): | ||
dicHisto[x] += listLgth[i] | ||
|
||
return dicHisto | ||
|
||
|
||
def smoothingHisto(dicHisto, xMin, xMax, binWidth, widthMovAve): | ||
dicSmoothHisto = {} | ||
listMovAve = [] | ||
|
||
for x in range(xMin, xMax, binWidth): | ||
listMovAve.append(dicHisto[x]) | ||
if len(listMovAve) < widthMovAve: | ||
continue | ||
dicSmoothHisto[x - binWidth * ((widthMovAve - 1) / 2)] \ | ||
= sum(listMovAve) / float(widthMovAve) | ||
listMovAve.pop(0) | ||
|
||
return dicSmoothHisto | ||
|
||
|
||
def printHisto(dicHisto, xMin, xMax, binWidth): | ||
for x in range(xMin, xMax, binWidth): | ||
#print str(x) + " : " + str(int(round(dicHisto[x], 0))) | ||
lenBar = int(round((dicHisto[x] / 20000), 0)) - 1 | ||
print str(x) + "\t", | ||
for i in range(lenBar): | ||
print "=", | ||
print "\n", | ||
print "\n", | ||
|
||
|
||
def setXMax(xMax, binWidth): | ||
return int((math.floor(xMax / binWidth)) * binWidth) | ||
|
||
|
||
def getFirstXMax(dicStats, binWidth, thresConLen): | ||
listLgth = dicStats["lgth"] | ||
listShort1Cov = dicStats["short1_cov"] | ||
maxCov = 0 | ||
subMaxCov = 0 | ||
|
||
for i in range(len(listLgth)): | ||
if listLgth[i] >= thresConLen: | ||
if listShort1Cov[i] > maxCov: | ||
subMaxCov = maxCov | ||
maxCov = listShort1Cov[i] | ||
|
||
xMax = setXMax(subMaxCov, binWidth) + binWidth * 5 | ||
return xMax | ||
|
||
|
||
def getN50(tupleConLen): | ||
listSortedConLen = list(tupleConLen) | ||
listSortedConLen.sort() | ||
listSortedConLen.reverse() | ||
totalLen = sum(listSortedConLen) | ||
sumLen = 0 | ||
|
||
for i in range(len(listSortedConLen)): | ||
sumLen += listSortedConLen[i] | ||
if sumLen >= totalLen / 2: | ||
return listSortedConLen[i] | ||
|
||
return -1 | ||
|
||
|
||
def setWidthByXMax(xMax): | ||
listWidth = [0, 0] # [binWidth, widthMovAve] | ||
|
||
if xMax > 300: | ||
listWidth = [6, 5] | ||
if xMax <= 300: | ||
listWidth = [4, 3] | ||
if xMax <= 120: | ||
listWidth = [2, 3] | ||
if xMax <= 100: | ||
listWidth = [1, 1] | ||
|
||
return listWidth | ||
|
||
|
||
def detectPeakPandS(dicHisto, xMin, xMax, binWidth, | ||
thresHeight, listPeakPandS): | ||
countIncrease = 0; thresIncrease = 3 | ||
countDecrease = 0; thresDecrease = 3 | ||
beforeHeight = -1 | ||
flagPeakStart = False | ||
peakHeight = 0; peakCov = 0 | ||
|
||
for x in range(xMax - binWidth, xMin - binWidth, -1 * binWidth): | ||
if beforeHeight == -1: | ||
beforeHeight = dicHisto[x] | ||
continue | ||
|
||
if not flagPeakStart: | ||
if dicHisto[x] >= thresHeight: | ||
if dicHisto[x] >= beforeHeight: | ||
countIncrease += 1 | ||
if countIncrease >= thresIncrease: | ||
countIncrease = 0 | ||
flagPeakStart = True | ||
beforeHeight = dicHisto[x] | ||
|
||
if flagPeakStart: | ||
if dicHisto[x] >= peakHeight: | ||
peakHeight = dicHisto[x] | ||
peakCov = x | ||
else: | ||
countDecrease += 1 | ||
if countDecrease >= thresDecrease: | ||
for i in range(2): | ||
if listPeakPandS[i] == -1: | ||
tmpBias = float(binWidth) / 2 | ||
listPeakPandS[i] = peakCov + tmpBias | ||
peakHeight = 0; peakCov = 0 | ||
break | ||
if listPeakPandS[1] != -1: | ||
return listPeakPandS | ||
countDecrease = 0 | ||
flagPeakStart = False | ||
|
||
return listPeakPandS | ||
|
||
|
||
|
||
# ---------- Main function ---------- | ||
|
||
# Import stats file | ||
fin_stats = open(sys.argv[1], "r") | ||
dicStats = importStats(fin_stats) | ||
|
||
# Make weighted histogram | ||
listPeak = [] | ||
xMin = 0 | ||
xMax = 1000 | ||
binWidth = 4 | ||
widthMovAve = 5 | ||
listPeakPandS = [-1, -1] | ||
N50 = 0 | ||
thresHeight = 0 | ||
thresConLen = 0 | ||
|
||
while True: | ||
# Get N50 | ||
if len(listPeak) == 0: | ||
N50 = getN50(tuple(dicStats["lgth"])) | ||
print "N50 : " + str(N50) | ||
thresConLen = N50 * 5 | ||
|
||
# Get first xMax | ||
if len(listPeak) == 0: | ||
xMax = getFirstXMax(dicStats, binWidth, thresConLen) | ||
print "First xMax : " + str(xMax) | ||
|
||
# Set width and xMax | ||
listWidth = setWidthByXMax(xMax) | ||
binWidth = listWidth[0]; widthMovAve = listWidth[1] | ||
xMax = setXMax(xMax, binWidth) | ||
|
||
# Make weighted and smoothed histogram | ||
xMin = 0 | ||
dicHisto = weightedHisto(dicStats, xMin, xMax, binWidth) | ||
dicSmoothHisto = smoothingHisto(dicHisto, xMin, xMax, | ||
binWidth, widthMovAve) | ||
xMin += binWidth * ((widthMovAve - 1) / 2) | ||
xMax -= binWidth * ((widthMovAve - 1) / 2) | ||
|
||
# Get thresHeight | ||
if len(listPeak) == 0: | ||
thresHeight = dicSmoothHisto[xMax - binWidth] | ||
print "Thres Height : " + str(thresHeight) | ||
|
||
# Print histogram | ||
if len(listPeak) == 0: | ||
printHisto(dicSmoothHisto, xMin, xMax, binWidth) | ||
|
||
# Detect (primary and) secondary peak | ||
listPeakPandS = detectPeakPandS(dicSmoothHisto, xMin, xMax, binWidth, | ||
thresHeight, listPeakPandS) | ||
|
||
# Record peak | ||
if len(listPeak) == 0: | ||
listPeak.append(listPeakPandS[0]) | ||
listPeak.append(listPeakPandS[1]) | ||
|
||
# When couldn't detect secondary peak, break | ||
if listPeakPandS[1] == -1: | ||
listPeak.pop(-1) | ||
print listPeak | ||
break | ||
|
||
# Prepare for next peak | ||
listPeakPandS[0] = listPeakPandS[1] | ||
listPeakPandS[1] = -1 | ||
xMax = listPeakPandS[0] |
33 changes: 33 additions & 0 deletions
33
contrib/MetaVelvet-v0.3.1/shuffleAndConvertSequences_fastq2fasta.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/usr/bin/env python2.5 | ||
# -*- coding: utf-8 -*- | ||
|
||
import sys | ||
import re | ||
|
||
listLine = ["", ""] | ||
fin_fw = open(sys.argv[1], "r") | ||
fin_bw = open(sys.argv[2], "r") | ||
fout = open(sys.argv[3], "w") | ||
|
||
while True: | ||
for i in range(2): | ||
for j in range(4): | ||
if i == 0: | ||
line = fin_fw.readline() | ||
else: | ||
line = fin_bw.readline() | ||
|
||
if not line: | ||
sys.exit() | ||
|
||
if j >= 2: | ||
continue | ||
|
||
if i == 0 and j == 0: | ||
line = ">" + re.sub("^@|\n", "", line) + "/1\n" | ||
elif i == 1 and j == 0: | ||
line = ">" + re.sub("^@|\n", "", line) + "/2\n" | ||
|
||
fout.write(line) | ||
|
||
|
Oops, something went wrong.