Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
efb4dbd
commit ef91e3c
Showing
3 changed files
with
395 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,316 @@ | ||
; The Computer Language Benchmarks Game | ||
;; http://benchmarksgame.alioth.debian.org/ | ||
;; | ||
;; ported from Scala #2 | ||
;; contributed by Alex Miller | ||
|
||
(ns alioth.knucleotide | ||
(:gen-class) | ||
(:require [clojure.string :as s]) | ||
(:import [java.util.concurrent Executors Future] | ||
[java.io InputStream] | ||
[clojure.lang Numbers] | ||
[java.nio.channels Channels ReadableByteChannel] | ||
[java.nio ByteBuffer])) | ||
|
||
(set! *warn-on-reflection* true) | ||
(set! *unchecked-math* true) | ||
|
||
(definterface IBits | ||
(add2 [^long b]) | ||
(addLots [bs]) | ||
(scan [^long n ^long offset]) | ||
(^long getSize [])) | ||
|
||
(deftype Bits [^ints data ;; array of ints, each of which has 16 2-bit slots | ||
^:unsynchronized-mutable ^long size ;; number of bits in Bits | ||
^:unsynchronized-mutable ^long index ;; write index in data | ||
^:unsynchronized-mutable ^long n] ;; bit offset to write at current index | ||
IBits | ||
(add2 [_ b] | ||
(let [data ^ints data] | ||
(set! size (inc size)) | ||
(when (> n 30) | ||
(set! index (inc index)) | ||
(set! n 0)) | ||
(aset data index (bit-or (aget ^ints data index) | ||
(Numbers/shiftLeftInt (bit-and b 0x3) n))) | ||
(set! n (+ n 2)) | ||
nil)) | ||
(addLots [this bs] | ||
(let [data ^ints data | ||
bs ^Bits bs] | ||
(if (or (= n 0) (> n 30)) | ||
(do | ||
(when (> n 30) | ||
(set! index (inc index)) | ||
(set! n 0)) | ||
(System/arraycopy ^ints (.-data bs) 0 data index (.-index bs)) | ||
(set! index (+ index (.-index bs))) | ||
(when (and (> index 0) (= n 0)) | ||
(set! index (dec index)) | ||
(set! n 32))) | ||
(do | ||
(loop [i 0] | ||
(when (< i (.-index bs)) | ||
(let [j (aget ^ints (.-data bs) i)] | ||
(aset data index (bit-or (aget data index) (Numbers/shiftLeftInt j n))) | ||
(set! index (inc index)) | ||
(aset data index (bit-or (aget data index) (Numbers/unsignedShiftRightInt j (- 32 n)))) | ||
(recur (inc i))))))) | ||
(set! size (+ size (* 16 ^long (.-index bs)))) | ||
(let [bsn (.-n bs)] | ||
(when (> bsn 0) | ||
(loop [n bsn | ||
i (aget ^ints (.-data bs) (.-index bs))] | ||
(when (> n 0) | ||
(.add2 this i) | ||
(recur (long (- n 2)) | ||
(long (Numbers/unsignedShiftRightInt i 2))))))) | ||
nil)) | ||
(scan [_ n offset] | ||
(let [data ^ints data | ||
mask (dec (bit-shift-left 1 (* n 2))) | ||
scanned (long-array (quot size n))] | ||
(loop [i (rem offset 16) | ||
j (quot offset 16) | ||
scan-index 0] | ||
(if (<= (+ (* j 16) i n) size) | ||
(if (<= (+ i n) 16) | ||
(let [l (bit-and (unsigned-bit-shift-right (aget data j) (* 2 i)) mask) | ||
newi (+ i n)] | ||
(aset scanned scan-index l) | ||
(if (>= newi 16) | ||
(recur (- newi 16) (inc j) (inc scan-index)) | ||
(recur newi j (inc scan-index)))) | ||
;;(bit-and (aget data j) 0xFFFFFFFF) | ||
(let [l (bit-and (bit-or (Numbers/unsignedShiftRightInt (aget data j) (int (* 2 i))) | ||
(bit-shift-left (aget data (inc j)) (* 2 (- 16 i)))) | ||
mask) | ||
newj (inc j) | ||
newi (+ i (- n 16))] | ||
(aset scanned scan-index l) | ||
(if (>= newi 16) | ||
(recur (- newi 16) (inc newj) (inc scan-index)) | ||
(recur newi newj (inc scan-index))))) | ||
(do | ||
(loop [pad-index scan-index] | ||
(if (< pad-index (alength scanned)) | ||
(do (aset scanned scan-index -1) | ||
(recur (inc pad-index))) | ||
scanned))))))) | ||
(getSize [_] size)) | ||
|
||
(def table (let [arr (long-array 256)] | ||
(loop [i 0] | ||
(if (< i 256) | ||
(do | ||
(aset arr i (case i | ||
(97 65) 0 | ||
(116 84) 1 | ||
(103 71) 2 | ||
(99 67) 3 | ||
-1)) | ||
(recur (inc i))) | ||
arr)))) | ||
|
||
(defn r ^long [^ReadableByteChannel ch ^ByteBuffer d] | ||
(.clear d) | ||
(.read ch d)) | ||
|
||
(defn add-all [bitsv] | ||
(let [total-size (inc ^long (/ ^long (reduce (fn ^long [a b] (+ ^long a ^long (.getSize ^IBits b))) 0 bitsv) 16)) | ||
all ^IBits (->Bits (int-array total-size) 0 0 0)] | ||
(doseq [^IBits bits bitsv :when (pos? (.getSize bits))] | ||
(.addLots all bits)) | ||
all)) | ||
|
||
(defn readb [^ReadableByteChannel ch ^ByteBuffer data ^long n ^long i] | ||
(let [table ^longs table] | ||
(loop [i i | ||
n n | ||
bits (->Bits (int-array (inc (quot (- n i) 4))) 0 0 0) | ||
bitsv []] | ||
(if (< i n) | ||
(let [b (long (.get data i))] | ||
(when (false? (= b (long 10))) ;; if not newline | ||
(.add2 ^IBits bits (aget table (bit-and b 0xFF)))) | ||
(recur (inc i) n bits bitsv)) | ||
(if (<= n 0) | ||
(add-all bitsv) | ||
(let [n (r ch data)] | ||
(recur 0 (long n) (->Bits (int-array (inc (quot n 4))) 0 0 0) (conj bitsv bits)))))))) | ||
|
||
(defn skip [^ReadableByteChannel ch ^ByteBuffer data ^long n ^long i] | ||
(loop [i i | ||
n n] | ||
(if (< i n) | ||
(if (= (long (.get data i)) 10) ;; \newline | ||
(if (= (inc i) n) | ||
(readb ch data (r ch data) 0) | ||
(readb ch data n i)) | ||
(recur (inc i) n)) | ||
(recur 0 (r ch data))))) | ||
|
||
(defn loadf [^String target] | ||
(let [ch ^ReadableByteChannel (Channels/newChannel System/in) | ||
tb ^bytes (.getBytes target) | ||
tlen (long (alength tb)) | ||
tl ^longs (long-array (inc tlen)) ;; pad by 1 space for match termination | ||
data ^ByteBuffer (ByteBuffer/allocateDirect 1048576) ;; 1024 * 1024 | ||
n (long (r ch data))] | ||
(doseq [i (range tlen)] | ||
(aset tl i (long (aget tb i)))) | ||
(loop [i (long 0) | ||
need (long 0) | ||
match (long (aget tl 0)) | ||
n (long n)] | ||
|
||
(if (< i n) | ||
(if (< need tlen) | ||
(if (= (long (.get data i)) match) | ||
(recur (inc i) (inc need) (aget tl (inc need)) n) | ||
(recur (inc i) 0 (long (aget tl 0)) n)) | ||
(if (= (inc i) n) | ||
(skip ch data (r ch data) 0) | ||
(skip ch data n i))) | ||
(recur 0 need match (r ch data)))))) | ||
|
||
(definterface IDnaHash | ||
(^alioth.knucleotide.IDnaHash add [^long key ^long count]) | ||
(^long get [^long key]) | ||
(^void printSorted [])) | ||
|
||
(defmacro hc [l size] | ||
`(bit-and (+ ~l (bit-shift-right ~l 17)) (dec ~size))) | ||
|
||
(defmacro nx [i size] | ||
`(bit-and (inc ~i) (dec ~size))) | ||
|
||
(def ^:constant decode ["A" "T" "G" "C"]) | ||
(defn l2s [^long l ^long n] | ||
(if (<= n 0) | ||
"" | ||
(str (decode (bit-and l 0x3)) (l2s (unsigned-bit-shift-right l 2) (dec n))))) | ||
|
||
(deftype DnaHash [^long z | ||
^long size | ||
^:unsynchronized-mutable ^long n | ||
^longs keys | ||
^longs counters] | ||
IDnaHash | ||
(add [this key count] | ||
(let [size size | ||
keys ^longs keys | ||
counters ^longs counters | ||
index (int (hc key size))] | ||
(cond | ||
;; new key | ||
(= (aget counters index) 0) | ||
(do | ||
(aset keys index key) | ||
(aset counters index count) | ||
(set! n (inc n)) | ||
this) | ||
|
||
;; existing key | ||
(= (aget keys index) key) | ||
(do | ||
(aset counters index (+ (aget counters index) count)) | ||
this) | ||
|
||
;; rehash | ||
(> (* 6 n) size) | ||
(let [newsize (* size 64) | ||
newhash (DnaHash. z newsize 0 (long-array newsize) (long-array newsize))] | ||
(loop [i 0] | ||
(if (< i size) | ||
(let [ci (aget counters i)] | ||
(when (> ci 0) | ||
(.add newhash (aget keys i) ci)) | ||
(recur (inc i))) | ||
(do | ||
(.add newhash key 1) | ||
newhash)))) | ||
|
||
true | ||
(loop [i (nx index size)] | ||
(let [ii (int i)] | ||
(if (or (= 0 (aget counters ii)) | ||
(= key (aget keys ii))) | ||
(if (= (aget counters i) 0) | ||
(do | ||
(aset keys ii key) | ||
(aset counters ii count) | ||
(set! n (inc n)) | ||
this) | ||
(do | ||
(aset counters ii (+ (aget counters ii) count)) | ||
this)) | ||
(recur (nx i size)))))))) | ||
(get [_ key] | ||
(loop [i (hc key size)] | ||
(if (and (pos? (aget counters i)) (not= key (aget keys i))) | ||
(recur i) | ||
(aget counters i)))) | ||
(printSorted [this] | ||
(let [tcounts (long (loop [idx 0 acc 0] | ||
(if (< idx (alength counters)) | ||
(recur (inc idx) (+ acc (aget counters idx))) | ||
acc))) | ||
factor (/ 100.0 tcounts) | ||
freqs (loop [i 0 | ||
acc (transient [])] | ||
(if (< i (alength counters)) | ||
(let [c (* factor (aget counters i)) | ||
k (l2s (aget keys i) z)] | ||
(recur (inc i) (if (> c 0) (conj! acc [c k]) acc))) | ||
(persistent! acc))) | ||
s (reverse (sort freqs))] | ||
(doseq [[freq label] s] | ||
(printf "%s %.3f\n" label freq)) | ||
(println)))) | ||
|
||
(defn prints [^IDnaHash d ^String s] | ||
(let [bs (.getBytes s) | ||
mapped (map (fn [^long b] (aget ^longs table (bit-and b 0xFF))) bs) | ||
k (reduce (fn [^long acc ^long b] (+ (* 4 acc) b)) 0 (reverse mapped))] | ||
(format "%d\t%s" (.get d k) s))) | ||
|
||
(defn addToHash [^IBits data ^IDnaHash h ^long n ^long offset] | ||
(let [s ^longs (.scan data n offset) | ||
c (alength s)] | ||
(loop [h h i 0] | ||
(if (< i c) | ||
(let [si (aget s i)] | ||
(if (< si 0) | ||
h | ||
(recur (.add h si 1) (inc i)))) | ||
h)))) | ||
|
||
(defn -main [& args] | ||
(let [sizes [1 2 3 4 6 12 18] | ||
sequence "GGTATTTTAATTTATAGT" | ||
data (loadf ">THREE") | ||
tasks (doall | ||
(map (fn [^long n] | ||
(fn [] | ||
(loop [h (DnaHash. n 512 0 (long-array 512) (long-array 512)) | ||
i 0] | ||
(if (< i n) | ||
(recur (addToHash data h n i) (inc i)) | ||
h)))) | ||
sizes)) | ||
processors (.. Runtime getRuntime availableProcessors) | ||
pool (Executors/newFixedThreadPool processors) | ||
[f1 f2 :as futures] (.invokeAll pool tasks)] | ||
(.printSorted ^DnaHash @f1) | ||
(.printSorted ^DnaHash @f2) | ||
|
||
(loop [[f & fs] (drop 2 futures) | ||
[s & ss] (drop 2 sizes)] | ||
(when f | ||
(println (prints @f (subs sequence 0 s))) | ||
(recur fs ss))) | ||
(.shutdown pool) | ||
(shutdown-agents))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.