Skip to content
This repository
Browse code

Update Clojure and Java programs to use for fasta benchmark

Made them match the current fastest ones for the 4-core 64-bit
hardware used by the Computer Language Benchmarks Game web site as of
Feb 20 2013.
  • Loading branch information...
commit 6d778a662392ed334c7690d1bce3bcdcde9e4701 1 parent 7f0272f
Andy Fingerhut authored February 20, 2013
18  fasta/README
@@ -7,3 +7,21 @@ them both.
7 7
 
8 8
 I also got a little faster results using byte-based output (clj-6)
9 9
 rather than char/String-based output (clj-1 through clj-5, and clj-7).
  10
+
  11
+
  12
+On Feb 20 2013, the fastest Clojure program for the 4-core 64-bit
  13
+machine was fasta.clojure-5.clojure, which is identical to my
  14
+fasta.clj-8.clj except for the URL in the second comment line.
  15
+
  16
+The fastest Java program was fasta.java-4.java
  17
+
  18
+
  19
+The fastest Clojure program for the fasta-redux problem was
  20
+fasta.clojure-4.clojure, which is very similar to my fasta.clj-11.clj,
  21
+but there are many small differences.  I didn't notice any changes
  22
+that should account for changes in speed, but it wasn't obvious.
  23
+
  24
+The fastest Java program for the fasta-redux problem was
  25
+fastaredux.java-3.java, which is identical to the file I copied and
  26
+called fasta.java-3.java except for the class name and comment line
  27
+with URL.
2  fasta/clj-compile.sh
@@ -12,6 +12,6 @@ source ../env.sh
12 12
 "${RM}" -fr "${CLJ_OBJ_DIR}"
13 13
 mkdir -p "${CLJ_OBJ_DIR}"
14 14
 
15  
-"${CP}" fasta.clj-11.clj "${CLJ_OBJ_DIR}/fasta.clj"
  15
+"${CP}" fasta.clj-8.clj "${CLJ_OBJ_DIR}/fasta.clj"
16 16
 
17 17
 "${JAVA}" "-Dclojure.compile.path=${PS_CLJ_OBJ_DIR}" -classpath "${PS_FULL_CLJ_CLASSPATH}" clojure.lang.Compile fasta
213  fasta/fasta.java-4.java
... ...
@@ -0,0 +1,213 @@
  1
+/*
  2
+ * The Computer Language Benchmarks Game
  3
+ * http://benchmarksgame.alioth.debian.org/
  4
+ *
  5
+ * modified by Mehmet D. AKIN
  6
+ * modified by Rikard Mustajärvi
  7
+ */
  8
+
  9
+import java.io.IOException;
  10
+import java.io.OutputStream;
  11
+
  12
+class fasta {
  13
+   static final int IM = 139968;
  14
+   static final int IA = 3877;
  15
+   static final int IC = 29573;
  16
+
  17
+   static final int LINE_LENGTH = 60;
  18
+   static final int BUFFER_SIZE = (LINE_LENGTH + 1)*1024; // add 1 for '\n'
  19
+
  20
+    // Weighted selection from alphabet
  21
+    public static String ALU =
  22
+              "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
  23
+            + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
  24
+            + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
  25
+            + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
  26
+            + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
  27
+            + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
  28
+            + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
  29
+
  30
+    private static final FloatProbFreq IUB = new FloatProbFreq(
  31
+          new byte[]{
  32
+                'a',  'c',  'g',  't',
  33
+                'B',  'D',  'H',  'K',
  34
+                'M',  'N',  'R',  'S',
  35
+                'V',  'W',  'Y'},
  36
+          new double[]{
  37
+                0.27, 0.12, 0.12, 0.27,
  38
+                0.02, 0.02, 0.02, 0.02,
  39
+                0.02, 0.02, 0.02, 0.02,
  40
+                0.02, 0.02, 0.02,
  41
+                }
  42
+          );
  43
+
  44
+    private static final FloatProbFreq HOMO_SAPIENS = new FloatProbFreq(
  45
+          new byte[]{
  46
+                'a',
  47
+                'c',
  48
+                'g',
  49
+                't'},
  50
+          new double[]{
  51
+                0.3029549426680d,
  52
+                0.1979883004921d,
  53
+                0.1975473066391d,
  54
+                0.3015094502008d}
  55
+          );
  56
+
  57
+   static final void makeRandomFasta(String id, String desc,
  58
+         FloatProbFreq fpf, int nChars, OutputStream writer)
  59
+         throws IOException
  60
+   {
  61
+      final int LINE_LENGTH = fasta.LINE_LENGTH;
  62
+      final int BUFFER_SIZE = fasta.BUFFER_SIZE;
  63
+      byte[] buffer = new byte[BUFFER_SIZE];
  64
+
  65
+      if (buffer.length % (LINE_LENGTH + 1) != 0) {
  66
+         throw new IllegalStateException(
  67
+            "buffer size must be a multiple of " +
  68
+            "line length (including line break)");
  69
+      }
  70
+
  71
+      String descStr = ">" + id + " " + desc + '\n';
  72
+      writer.write(descStr.getBytes());
  73
+
  74
+      int bufferIndex = 0;
  75
+      while (nChars > 0) {
  76
+         int chunkSize;
  77
+         if (nChars >= LINE_LENGTH) {
  78
+            chunkSize = LINE_LENGTH;
  79
+         } else {
  80
+            chunkSize = nChars;
  81
+         }
  82
+
  83
+         if (bufferIndex == BUFFER_SIZE) {
  84
+            writer.write(buffer, 0, bufferIndex);
  85
+            bufferIndex = 0;
  86
+         }
  87
+
  88
+         bufferIndex = fpf
  89
+            .selectRandomIntoBuffer(buffer, bufferIndex, chunkSize);
  90
+         buffer[bufferIndex++] = '\n';
  91
+
  92
+         nChars -= chunkSize;
  93
+      }
  94
+
  95
+      writer.write(buffer, 0, bufferIndex);
  96
+   }
  97
+
  98
+    static final void makeRepeatFasta(
  99
+          String id, String desc, String alu,
  100
+          int nChars, OutputStream writer) throws IOException
  101
+    {
  102
+       final byte[] aluBytes = alu.getBytes();
  103
+       int aluIndex = 0;
  104
+
  105
+       final int LINE_LENGTH = fasta.LINE_LENGTH;
  106
+       final int BUFFER_SIZE = fasta.BUFFER_SIZE;
  107
+       byte[] buffer = new byte[BUFFER_SIZE];
  108
+
  109
+       if (buffer.length % (LINE_LENGTH + 1) != 0) {
  110
+          throw new IllegalStateException(
  111
+                "buffer size must be a multiple " +
  112
+                "of line length (including line break)");
  113
+       }
  114
+
  115
+        String descStr = ">" + id + " " + desc + '\n';
  116
+        writer.write(descStr.getBytes());
  117
+
  118
+        int bufferIndex = 0;
  119
+        while (nChars > 0) {
  120
+           final int chunkSize;
  121
+           if (nChars >= LINE_LENGTH) {
  122
+              chunkSize = LINE_LENGTH;
  123
+         } else {
  124
+            chunkSize = nChars;
  125
+         }
  126
+
  127
+           if (bufferIndex == BUFFER_SIZE) {
  128
+                writer.write(buffer, 0, bufferIndex);
  129
+                bufferIndex = 0;
  130
+           }
  131
+
  132
+           for (int i = 0; i < chunkSize; i++) {
  133
+              if (aluIndex == aluBytes.length) {
  134
+                 aluIndex = 0;
  135
+              }
  136
+
  137
+              buffer[bufferIndex++] = aluBytes[aluIndex++];
  138
+           }
  139
+           buffer[bufferIndex++] = '\n';
  140
+
  141
+           nChars -= chunkSize;
  142
+        }
  143
+
  144
+       writer.write(buffer, 0, bufferIndex);
  145
+    }
  146
+
  147
+    public static void main(String[] args) throws IOException
  148
+    {
  149
+        int n = 1000;
  150
+//        int n = 25000000;
  151
+        if (args.length > 0) {
  152
+         n = Integer.parseInt(args[0]);
  153
+      }
  154
+
  155
+        OutputStream out = System.out;
  156
+        makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out);
  157
+        makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out);
  158
+        makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out);
  159
+        out.close();
  160
+    }
  161
+
  162
+    public static final class FloatProbFreq {
  163
+       static int last = 42;
  164
+       final byte[] chars;
  165
+       final float[] probs;
  166
+
  167
+       public FloatProbFreq(byte[] chars, double[] probs) {
  168
+          this.chars = chars;
  169
+          this.probs = new float[probs.length];
  170
+          for (int i = 0; i < probs.length; i++) {
  171
+             this.probs[i] = (float)probs[i];
  172
+          }
  173
+          makeCumulative();
  174
+       }
  175
+
  176
+       private final void makeCumulative() {
  177
+            double cp = 0.0;
  178
+            for (int i = 0; i < probs.length; i++) {
  179
+                cp += probs[i];
  180
+                probs[i] = (float)cp;
  181
+            }
  182
+        }
  183
+
  184
+       public final int selectRandomIntoBuffer(
  185
+             byte[] buffer, int bufferIndex, final int nRandom) {
  186
+          final byte[] chars = this.chars;
  187
+          final float[] probs = this.probs;
  188
+          final int len = probs.length;
  189
+
  190
+          outer:
  191
+          for (int rIndex = 0; rIndex < nRandom; rIndex++) {
  192
+             final float r = random(1.0f);
  193
+                for (int i = 0; i < len; i++) {
  194
+                 if (r < probs[i]) {
  195
+                    buffer[bufferIndex++] = chars[i];
  196
+                    continue outer;
  197
+                 }
  198
+              }
  199
+
  200
+                buffer[bufferIndex++] = chars[len-1];
  201
+          }
  202
+
  203
+            return bufferIndex;
  204
+       }
  205
+
  206
+        // pseudo-random number generator
  207
+        public static final float random(final float max) {
  208
+           final float oneOverIM = (1.0f/ IM);
  209
+            last = (last * IA + IC) % IM;
  210
+            return max * last * oneOverIM;
  211
+        }
  212
+    }
  213
+}
2  fasta/java-compile.sh
@@ -4,7 +4,7 @@ source ../env.sh
4 4
 
5 5
 "${JAVA}" -version
6 6
 "${JAVAC}" -version
7  
-"${CP}" fasta.java-3.java fasta.java
  7
+"${CP}" fasta.java-4.java fasta.java
8 8
 mkdir -p "${JAVA_OBJ_DIR}"
9 9
 "${JAVAC}" -d "${JAVA_OBJ_DIR}" fasta.java
10 10
 "${RM}" fasta.java

0 notes on commit 6d778a6

Please sign in to comment.
Something went wrong with that request. Please try again.