/
fasta.jruby-5.jruby
119 lines (103 loc) · 2.53 KB
/
fasta.jruby-5.jruby
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# The Computer Language Benchmarks Game
# http://shootout.alioth.debian.org/
# Contributed by Sokolov Yura
# Modified by Rick Branson, Andy Fingerhut
$last = 42.0
GR_IM = 139968.0
GR_IA = 3877.0
GR_IC = 29573.0
alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
iub = [
["a", 0.27],
["c", 0.12],
["g", 0.12],
["t", 0.27],
["B", 0.02],
["D", 0.02],
["H", 0.02],
["K", 0.02],
["M", 0.02],
["N", 0.02],
["R", 0.02],
["S", 0.02],
["V", 0.02],
["W", 0.02],
["Y", 0.02],
]
homosapiens = [
["a", 0.3029549426680],
["c", 0.1979883004921],
["g", 0.1975473066391],
["t", 0.3015094502008],
]
def generate_rand_finder(tbl)
rb = "lambda do |n| \n"
tbl.each do |va, vb|
rb += "return #{va.inspect} if #{vb.inspect} > n\n"
end
rb += "end\n"
eval rb
end
def make_repeat_fasta(id, desc, src, n)
puts ">#{id} #{desc}"
v = nil
width = 60
l = src.length
s = src * (((width + l - 1) / l) + 1)
i = 0
p = []
p[i] = s.slice(i,width)
i = (i + width) % l
while i != 0 do
p[i] = s[i,width]
i = (i + width) % l
end
i = 0
printed = 0
while printed <= (n - width) do
puts "#{p[i]}"
printed += width
i = (i + width) % l
end
if printed < n
puts "#{p[i].slice(0, n-printed)}"
end
end
def make_random_fasta(id, desc, table, n)
puts ">#{id} #{desc}"
rand, v = nil,nil
width = 60
chunk = 1 * width
prob = 0.0
rwidth = (1..width)
table.each{|v| v[1]= (prob += v[1])}
f = generate_rand_finder(table)
if RUBY_PLATFORM == "java"
collector = lambda do |x|
rand = ($last = ($last * GR_IA + GR_IC) % GR_IM) / GR_IM
table.find { |va, vb| vb > rand }[0]
end
else
collector = lambda do |x|
rand = ($last = ($last * GR_IA + GR_IC) % GR_IM) / GR_IM
f.call(rand)
end
end
for i in 1..(n/width)
puts rwidth.collect(&collector).join
end
if n%width != 0
puts (1..(n%width)).collect(&collector).join
end
end
n = (ARGV[0] or 27).to_i
make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)