Permalink
Browse files

cythonize fasta

--HG--
rename : fasta.cython.pyx => fasta_cython.pyx
rename : fasta.python.py => fasta_python.py
  • Loading branch information...
1 parent 65fa625 commit 2f74bc40516c7208132893b15049c66ce20b7082 @robertwb robertwb committed Feb 11, 2009
Showing with 110 additions and 79 deletions.
  1. +0 −79 fasta.python.py
  2. +110 −0 fasta_cython.pyx
  3. 0 fasta.cython.pyx → fasta_python.py
View
@@ -1,79 +0,0 @@
-# The Computer Language Benchmarks Game
-# http://shootout.alioth.debian.org/
-#
-# modified by Ian Osgood
-# modified again by Heinrich Acker
-
-import sys, bisect
-
-alu = (
- 'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG'
- 'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA'
- 'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT'
- 'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA'
- 'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG'
- 'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC'
- 'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA')
-
-iub = zip('acgtBDHKMNRSVWY', [0.27, 0.12, 0.12, 0.27] + [0.02]*11)
-
-homosapiens = [
- ('a', 0.3029549426680),
- ('c', 0.1979883004921),
- ('g', 0.1975473066391),
- ('t', 0.3015094502008),
-]
-
-
-def genRandom(lim, ia = 3877, ic = 29573, im = 139968):
- seed = 42
- imf = float(im)
- while 1:
- seed = (seed * ia + ic) % im
- yield lim * seed / imf
-
-Random = genRandom(1.)
-
-def makeCumulative(table):
- P = []
- C = []
- prob = 0.
- for char, p in table:
- prob += p
- P += [prob]
- C += [char]
- return (P, C)
-
-def repeatFasta(src, n):
- width = 60
- r = len(src)
- s = src + src + src[:n % r]
- for j in xrange(n // width):
- i = j*width % r
- print s[i:i+width]
- if n % width:
- print s[-(n % width):]
-
-def randomFasta(table, n):
- width = 60
- r = xrange(width)
- gR = Random.next
- bb = bisect.bisect
- jn = ''.join
- probs, chars = makeCumulative(table)
- for j in xrange(n // width):
- print jn([chars[bb(probs, gR())] for i in r])
- if n % width:
- print jn([chars[bb(probs, gR())] for i in xrange(n % width)])
-
-
-n = int(sys.argv[1])
-
-print '>ONE Homo sapiens alu'
-repeatFasta(alu, n*2)
-
-print '>TWO IUB ambiguity codes'
-randomFasta(iub, n*3)
-
-print '>THREE Homo sapiens frequency'
-randomFasta(homosapiens, n*5)
View
@@ -0,0 +1,110 @@
+# The Computer Language Benchmarks Game
+# http://shootout.alioth.debian.org/
+#
+# modified by Ian Osgood
+# modified again by Heinrich Acker
+
+cdef extern from *:
+ void memcpy(void *, void *, int)
+
+import sys
+
+alu = (
+ 'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG'
+ 'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA'
+ 'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT'
+ 'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA'
+ 'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG'
+ 'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC'
+ 'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA')
+
+iub = zip('acgtBDHKMNRSVWY', [0.27, 0.12, 0.12, 0.27] + [0.02]*11)
+
+homosapiens = [
+ ('a', 0.3029549426680),
+ ('c', 0.1979883004921),
+ ('g', 0.1975473066391),
+ ('t', 0.3015094502008),
+]
+
+cdef unsigned long seed = 42
+cdef inline float pseudo_random(float lim):
+ cdef unsigned long ia = 3877, ic = 29573, im = 139968
+ cdef float imf = im
+ global seed
+ seed = (seed * ia + ic) % im
+ return lim * <long>seed / imf
+
+def makeCumulative(table):
+ P = []
+ C = []
+ prob = 0.
+ for char, p in table:
+ prob += p
+ P += [prob]
+ C += [char]
+ return (P, C)
+
+def repeatFasta(src, int n):
+ cdef int i, j, r = len(src)
+ cdef int width = 60
+ s = src * 2
+ line = ' ' * width
+ cdef char* ss = s
+ cdef char* buf = line
+ i = 0
+ for j in xrange(n // width):
+ memcpy(buf, ss + i, width)
+ print line
+ i += width
+ i -= r * (i>r)
+ if n % width:
+ memcpy(buf, ss + i, width)
+ print line[:n % width]
+
+def randomFasta(table, int n):
+
+ probs, chars = makeCumulative(table)
+ cdef int width = 60
+ cdef float cprobs[15]
+ cdef char cchars[15]
+ cdef int i, j, k
+ cdef float r
+ line = ' ' * width
+ # buf is a pointer to the contents of line
+ cdef char* buf = line
+ cdef int bisect
+ cdef float bisect_prob
+ for i in range(len(table)):
+ cprobs[i] = probs[i]
+ cchars[i] = ord(chars[i])
+
+ for j in xrange(n // width):
+ for i in range(width):
+ r = pseudo_random(1.0)
+ k = (r >= cprobs[0]) + (r >= cprobs[1]) + (r >= cprobs[2])
+ while cprobs[k] < r:
+ k += 1
+ buf[i] = cchars[k]
+ print line
+
+ if n % width:
+ for i in range(n % width):
+ r = pseudo_random(1.0)
+ k = 0
+ while cprobs[k] < r:
+ k += 1
+ buf[i] = cchars[k]
+ print line[:n % width]
+
+
+n = 100 #int(sys.argv[1])
+
+print '>ONE Homo sapiens alu'
+repeatFasta(alu, n*2)
+
+print '>TWO IUB ambiguity codes'
+randomFasta(iub, n*3)
+
+print '>THREE Homo sapiens frequency'
+randomFasta(homosapiens, n*5)
File renamed without changes.

0 comments on commit 2f74bc4

Please sign in to comment.