-
Notifications
You must be signed in to change notification settings - Fork 2
/
mcontig.py
67 lines (55 loc) · 1.2 KB
/
mcontig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#! /usr/bin/python
# Program: mContig.py
# Function: Corta scaffold em um arquivo mulfifasta
# Description:
# Author: Diego Mariano
# Version: 1
from Bio import SeqIO
import sys
# Helper
try:
p1 = sys.argv[1]
except:
print "HELP: Use 'python mcontig.py [name_file].fasta'"
sys.exit()
if p1 == "-h" or p1 == '--help':
print "HELP: Use 'python mcontig.py [name_file].fasta'"
sys.exit()
# Lendo sequencia com Biopython
for i in SeqIO.parse(p1,"fasta"):
seq_final = str(i.seq)
tam_seq = len(i.seq)
# Declaracoes iniciais
seq_final += "N"
tam_seq += 1
tamGap = 0
tmpContig = []
cont = 1
w = open('m.fasta','w')
i = 0
# Le nucleotideo por nucleotideo
while i < tam_seq:
if seq_final[i] == 'n' or seq_final[i] == 'N':
if tamGap == 0:
gapStart = i
tmpContigStr = ''.join(tmpContig)
# Grava no arquivo
if cont == 1:
contig_atual = ">contig_%d\n" %(cont)
if cont > 1:
contig_atual = "\n>contig_%d\n" %(cont)
cont = cont + 1
w.write(contig_atual)
w.write(tmpContigStr)
tmpContig = []
tmpContigStr = ''
tamGap = tamGap + 1
else:
tmpContig.append(seq_final[i])
if tamGap > 0:
gapEnd = i-1
tamGap = 0
i = i + 1
print cont-1
w.close()
w.closed