diff --git a/fasta/README b/fasta/README index cf0bcfe..09afaaa 100644 --- a/fasta/README +++ b/fasta/README @@ -7,3 +7,21 @@ them both. I also got a little faster results using byte-based output (clj-6) rather than char/String-based output (clj-1 through clj-5, and clj-7). + + +On Feb 20 2013, the fastest Clojure program for the 4-core 64-bit +machine was fasta.clojure-5.clojure, which is identical to my +fasta.clj-8.clj except for the URL in the second comment line. + +The fastest Java program was fasta.java-4.java + + +The fastest Clojure program for the fasta-redux problem was +fasta.clojure-4.clojure, which is very similar to my fasta.clj-11.clj, +but there are many small differences. I didn't notice any changes +that should account for changes in speed, but it wasn't obvious. + +The fastest Java program for the fasta-redux problem was +fastaredux.java-3.java, which is identical to the file I copied and +called fasta.java-3.java except for the class name and comment line +with URL. diff --git a/fasta/clj-compile.sh b/fasta/clj-compile.sh index f2f4ab9..597e636 100755 --- a/fasta/clj-compile.sh +++ b/fasta/clj-compile.sh @@ -12,6 +12,6 @@ source ../env.sh "${RM}" -fr "${CLJ_OBJ_DIR}" mkdir -p "${CLJ_OBJ_DIR}" -"${CP}" fasta.clj-11.clj "${CLJ_OBJ_DIR}/fasta.clj" +"${CP}" fasta.clj-8.clj "${CLJ_OBJ_DIR}/fasta.clj" "${JAVA}" "-Dclojure.compile.path=${PS_CLJ_OBJ_DIR}" -classpath "${PS_FULL_CLJ_CLASSPATH}" clojure.lang.Compile fasta diff --git a/fasta/fasta.java-4.java b/fasta/fasta.java-4.java new file mode 100644 index 0000000..aca7fc6 --- /dev/null +++ b/fasta/fasta.java-4.java @@ -0,0 +1,213 @@ +/* + * The Computer Language Benchmarks Game + * http://benchmarksgame.alioth.debian.org/ + * + * modified by Mehmet D. AKIN + * modified by Rikard Mustajärvi + */ + +import java.io.IOException; +import java.io.OutputStream; + +class fasta { + static final int IM = 139968; + static final int IA = 3877; + static final int IC = 29573; + + static final int LINE_LENGTH = 60; + static final int BUFFER_SIZE = (LINE_LENGTH + 1)*1024; // add 1 for '\n' + + // Weighted selection from alphabet + public static String ALU = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; + + private static final FloatProbFreq IUB = new FloatProbFreq( + new byte[]{ + 'a', 'c', 'g', 't', + 'B', 'D', 'H', 'K', + 'M', 'N', 'R', 'S', + 'V', 'W', 'Y'}, + new double[]{ + 0.27, 0.12, 0.12, 0.27, + 0.02, 0.02, 0.02, 0.02, + 0.02, 0.02, 0.02, 0.02, + 0.02, 0.02, 0.02, + } + ); + + private static final FloatProbFreq HOMO_SAPIENS = new FloatProbFreq( + new byte[]{ + 'a', + 'c', + 'g', + 't'}, + new double[]{ + 0.3029549426680d, + 0.1979883004921d, + 0.1975473066391d, + 0.3015094502008d} + ); + + static final void makeRandomFasta(String id, String desc, + FloatProbFreq fpf, int nChars, OutputStream writer) + throws IOException + { + final int LINE_LENGTH = fasta.LINE_LENGTH; + final int BUFFER_SIZE = fasta.BUFFER_SIZE; + byte[] buffer = new byte[BUFFER_SIZE]; + + if (buffer.length % (LINE_LENGTH + 1) != 0) { + throw new IllegalStateException( + "buffer size must be a multiple of " + + "line length (including line break)"); + } + + String descStr = ">" + id + " " + desc + '\n'; + writer.write(descStr.getBytes()); + + int bufferIndex = 0; + while (nChars > 0) { + int chunkSize; + if (nChars >= LINE_LENGTH) { + chunkSize = LINE_LENGTH; + } else { + chunkSize = nChars; + } + + if (bufferIndex == BUFFER_SIZE) { + writer.write(buffer, 0, bufferIndex); + bufferIndex = 0; + } + + bufferIndex = fpf + .selectRandomIntoBuffer(buffer, bufferIndex, chunkSize); + buffer[bufferIndex++] = '\n'; + + nChars -= chunkSize; + } + + writer.write(buffer, 0, bufferIndex); + } + + static final void makeRepeatFasta( + String id, String desc, String alu, + int nChars, OutputStream writer) throws IOException + { + final byte[] aluBytes = alu.getBytes(); + int aluIndex = 0; + + final int LINE_LENGTH = fasta.LINE_LENGTH; + final int BUFFER_SIZE = fasta.BUFFER_SIZE; + byte[] buffer = new byte[BUFFER_SIZE]; + + if (buffer.length % (LINE_LENGTH + 1) != 0) { + throw new IllegalStateException( + "buffer size must be a multiple " + + "of line length (including line break)"); + } + + String descStr = ">" + id + " " + desc + '\n'; + writer.write(descStr.getBytes()); + + int bufferIndex = 0; + while (nChars > 0) { + final int chunkSize; + if (nChars >= LINE_LENGTH) { + chunkSize = LINE_LENGTH; + } else { + chunkSize = nChars; + } + + if (bufferIndex == BUFFER_SIZE) { + writer.write(buffer, 0, bufferIndex); + bufferIndex = 0; + } + + for (int i = 0; i < chunkSize; i++) { + if (aluIndex == aluBytes.length) { + aluIndex = 0; + } + + buffer[bufferIndex++] = aluBytes[aluIndex++]; + } + buffer[bufferIndex++] = '\n'; + + nChars -= chunkSize; + } + + writer.write(buffer, 0, bufferIndex); + } + + public static void main(String[] args) throws IOException + { + int n = 1000; +// int n = 25000000; + if (args.length > 0) { + n = Integer.parseInt(args[0]); + } + + OutputStream out = System.out; + makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out); + makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out); + makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out); + out.close(); + } + + public static final class FloatProbFreq { + static int last = 42; + final byte[] chars; + final float[] probs; + + public FloatProbFreq(byte[] chars, double[] probs) { + this.chars = chars; + this.probs = new float[probs.length]; + for (int i = 0; i < probs.length; i++) { + this.probs[i] = (float)probs[i]; + } + makeCumulative(); + } + + private final void makeCumulative() { + double cp = 0.0; + for (int i = 0; i < probs.length; i++) { + cp += probs[i]; + probs[i] = (float)cp; + } + } + + public final int selectRandomIntoBuffer( + byte[] buffer, int bufferIndex, final int nRandom) { + final byte[] chars = this.chars; + final float[] probs = this.probs; + final int len = probs.length; + + outer: + for (int rIndex = 0; rIndex < nRandom; rIndex++) { + final float r = random(1.0f); + for (int i = 0; i < len; i++) { + if (r < probs[i]) { + buffer[bufferIndex++] = chars[i]; + continue outer; + } + } + + buffer[bufferIndex++] = chars[len-1]; + } + + return bufferIndex; + } + + // pseudo-random number generator + public static final float random(final float max) { + final float oneOverIM = (1.0f/ IM); + last = (last * IA + IC) % IM; + return max * last * oneOverIM; + } + } +} diff --git a/fasta/java-compile.sh b/fasta/java-compile.sh index cb8505f..ee0199e 100755 --- a/fasta/java-compile.sh +++ b/fasta/java-compile.sh @@ -4,7 +4,7 @@ source ../env.sh "${JAVA}" -version "${JAVAC}" -version -"${CP}" fasta.java-3.java fasta.java +"${CP}" fasta.java-4.java fasta.java mkdir -p "${JAVA_OBJ_DIR}" "${JAVAC}" -d "${JAVA_OBJ_DIR}" fasta.java "${RM}" fasta.java