Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 61 lines (48 sloc) 1.56 KB
from collections import defaultdict
from itertools import chain
import FloydWarshall
import random
def kmers_in_word(w):
return [w[i:i + 4] for i in range(len(w) - 4)]
def random_path(d):
result = []
items = d.keys()
while len(items) > 0:
x = random.choice(items)
result.append(x)
items = list(d[x])
return result
def branch(d, s, root, level=0):
if level < 3:
root[0].append(s)
root[1].extend((s, edge, next(k for k in kmers[s] if edge in k)) for edge in d[s])
for edge in d[s]:
branch(d, edge, root, level+1)
def words_sharing_kmer(word, d):
return chain(d[k] for k in kmers_in_word(word))
if __name__ == '__main__':
with open("corncob_lowercase.txt") as f:
s = (line.strip() for line in f)
word_list = [w for w in s if len(w) > 3]
kmers = defaultdict(list)
for word in word_list:
for k in kmers_in_word(word):
kmers[k].append(word)
debruijn = defaultdict(set)
for kmer, words in kmers.iteritems():
for word in words:
for k in kmers_in_word(word):
if kmer[1:4] == k[0:3]:
debruijn[kmer].add(k)
print len(debruijn)
# print debruijn
# p = random_path(debruijn)
# while len(p) < 10:
# p = random_path(debruijn)
# print p
# for i in range(len(p)-1):
# print [k for k in kmers[p[i]] if p[i+1] in k]
tree = ([],[])
branch(debruijn, 'rati', tree)
for edge in tree[1]:
print ",".join(edge)