Skip to content

Commit

Permalink
Update Clojure and Java programs to use for fasta benchmark
Browse files Browse the repository at this point in the history
Made them match the current fastest ones for the 4-core 64-bit
hardware used by the Computer Language Benchmarks Game web site as of
Feb 20 2013.
  • Loading branch information
jafingerhut committed Feb 21, 2013
1 parent 7f0272f commit 6d778a6
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 2 deletions.
18 changes: 18 additions & 0 deletions fasta/README
Expand Up @@ -7,3 +7,21 @@ them both.


I also got a little faster results using byte-based output (clj-6) I also got a little faster results using byte-based output (clj-6)
rather than char/String-based output (clj-1 through clj-5, and clj-7). rather than char/String-based output (clj-1 through clj-5, and clj-7).


On Feb 20 2013, the fastest Clojure program for the 4-core 64-bit
machine was fasta.clojure-5.clojure, which is identical to my
fasta.clj-8.clj except for the URL in the second comment line.

The fastest Java program was fasta.java-4.java


The fastest Clojure program for the fasta-redux problem was
fasta.clojure-4.clojure, which is very similar to my fasta.clj-11.clj,
but there are many small differences. I didn't notice any changes
that should account for changes in speed, but it wasn't obvious.

The fastest Java program for the fasta-redux problem was
fastaredux.java-3.java, which is identical to the file I copied and
called fasta.java-3.java except for the class name and comment line
with URL.
2 changes: 1 addition & 1 deletion fasta/clj-compile.sh
Expand Up @@ -12,6 +12,6 @@ source ../env.sh
"${RM}" -fr "${CLJ_OBJ_DIR}" "${RM}" -fr "${CLJ_OBJ_DIR}"
mkdir -p "${CLJ_OBJ_DIR}" mkdir -p "${CLJ_OBJ_DIR}"


"${CP}" fasta.clj-11.clj "${CLJ_OBJ_DIR}/fasta.clj" "${CP}" fasta.clj-8.clj "${CLJ_OBJ_DIR}/fasta.clj"


"${JAVA}" "-Dclojure.compile.path=${PS_CLJ_OBJ_DIR}" -classpath "${PS_FULL_CLJ_CLASSPATH}" clojure.lang.Compile fasta "${JAVA}" "-Dclojure.compile.path=${PS_CLJ_OBJ_DIR}" -classpath "${PS_FULL_CLJ_CLASSPATH}" clojure.lang.Compile fasta
213 changes: 213 additions & 0 deletions fasta/fasta.java-4.java
@@ -0,0 +1,213 @@
/*
* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* modified by Mehmet D. AKIN
* modified by Rikard Mustajärvi
*/

import java.io.IOException;
import java.io.OutputStream;

class fasta {
static final int IM = 139968;
static final int IA = 3877;
static final int IC = 29573;

static final int LINE_LENGTH = 60;
static final int BUFFER_SIZE = (LINE_LENGTH + 1)*1024; // add 1 for '\n'

// Weighted selection from alphabet
public static String ALU =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
+ "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
+ "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
+ "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
+ "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
+ "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
+ "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";

private static final FloatProbFreq IUB = new FloatProbFreq(
new byte[]{
'a', 'c', 'g', 't',
'B', 'D', 'H', 'K',
'M', 'N', 'R', 'S',
'V', 'W', 'Y'},
new double[]{
0.27, 0.12, 0.12, 0.27,
0.02, 0.02, 0.02, 0.02,
0.02, 0.02, 0.02, 0.02,
0.02, 0.02, 0.02,
}
);

private static final FloatProbFreq HOMO_SAPIENS = new FloatProbFreq(
new byte[]{
'a',
'c',
'g',
't'},
new double[]{
0.3029549426680d,
0.1979883004921d,
0.1975473066391d,
0.3015094502008d}
);

static final void makeRandomFasta(String id, String desc,
FloatProbFreq fpf, int nChars, OutputStream writer)
throws IOException
{
final int LINE_LENGTH = fasta.LINE_LENGTH;
final int BUFFER_SIZE = fasta.BUFFER_SIZE;
byte[] buffer = new byte[BUFFER_SIZE];

if (buffer.length % (LINE_LENGTH + 1) != 0) {
throw new IllegalStateException(
"buffer size must be a multiple of " +
"line length (including line break)");
}

String descStr = ">" + id + " " + desc + '\n';
writer.write(descStr.getBytes());

int bufferIndex = 0;
while (nChars > 0) {
int chunkSize;
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH;
} else {
chunkSize = nChars;
}

if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}

bufferIndex = fpf
.selectRandomIntoBuffer(buffer, bufferIndex, chunkSize);
buffer[bufferIndex++] = '\n';

nChars -= chunkSize;
}

writer.write(buffer, 0, bufferIndex);
}

static final void makeRepeatFasta(
String id, String desc, String alu,
int nChars, OutputStream writer) throws IOException
{
final byte[] aluBytes = alu.getBytes();
int aluIndex = 0;

final int LINE_LENGTH = fasta.LINE_LENGTH;
final int BUFFER_SIZE = fasta.BUFFER_SIZE;
byte[] buffer = new byte[BUFFER_SIZE];

if (buffer.length % (LINE_LENGTH + 1) != 0) {
throw new IllegalStateException(
"buffer size must be a multiple " +
"of line length (including line break)");
}

String descStr = ">" + id + " " + desc + '\n';
writer.write(descStr.getBytes());

int bufferIndex = 0;
while (nChars > 0) {
final int chunkSize;
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH;
} else {
chunkSize = nChars;
}

if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}

for (int i = 0; i < chunkSize; i++) {
if (aluIndex == aluBytes.length) {
aluIndex = 0;
}

buffer[bufferIndex++] = aluBytes[aluIndex++];
}
buffer[bufferIndex++] = '\n';

nChars -= chunkSize;
}

writer.write(buffer, 0, bufferIndex);
}

public static void main(String[] args) throws IOException
{
int n = 1000;
// int n = 25000000;
if (args.length > 0) {
n = Integer.parseInt(args[0]);
}

OutputStream out = System.out;
makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out);
makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out);
makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out);
out.close();
}

public static final class FloatProbFreq {
static int last = 42;
final byte[] chars;
final float[] probs;

public FloatProbFreq(byte[] chars, double[] probs) {
this.chars = chars;
this.probs = new float[probs.length];
for (int i = 0; i < probs.length; i++) {
this.probs[i] = (float)probs[i];
}
makeCumulative();
}

private final void makeCumulative() {
double cp = 0.0;
for (int i = 0; i < probs.length; i++) {
cp += probs[i];
probs[i] = (float)cp;
}
}

public final int selectRandomIntoBuffer(
byte[] buffer, int bufferIndex, final int nRandom) {
final byte[] chars = this.chars;
final float[] probs = this.probs;
final int len = probs.length;

outer:
for (int rIndex = 0; rIndex < nRandom; rIndex++) {
final float r = random(1.0f);
for (int i = 0; i < len; i++) {
if (r < probs[i]) {
buffer[bufferIndex++] = chars[i];
continue outer;
}
}

buffer[bufferIndex++] = chars[len-1];
}

return bufferIndex;
}

// pseudo-random number generator
public static final float random(final float max) {
final float oneOverIM = (1.0f/ IM);
last = (last * IA + IC) % IM;
return max * last * oneOverIM;
}
}
}
2 changes: 1 addition & 1 deletion fasta/java-compile.sh
Expand Up @@ -4,7 +4,7 @@ source ../env.sh


"${JAVA}" -version "${JAVA}" -version
"${JAVAC}" -version "${JAVAC}" -version
"${CP}" fasta.java-3.java fasta.java "${CP}" fasta.java-4.java fasta.java
mkdir -p "${JAVA_OBJ_DIR}" mkdir -p "${JAVA_OBJ_DIR}"
"${JAVAC}" -d "${JAVA_OBJ_DIR}" fasta.java "${JAVAC}" -d "${JAVA_OBJ_DIR}" fasta.java
"${RM}" fasta.java "${RM}" fasta.java

0 comments on commit 6d778a6

Please sign in to comment.