Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added new project structure

  • Loading branch information...
commit 5b2ac555f693731e90b1c5978fdc694408c7e96d 1 parent b2c9e41
Federico Brubacher authored

Showing 23 changed files with 762 additions and 0 deletions. Show diff stats Hide diff stats

  1. BIN  .DS_Store
  2. +10 0 storm-ml/.classpath
  3. +36 0 storm-ml/.project
  4. +2 0  storm-ml/.settings/org.eclipse.core.resources.prefs
  5. +5 0 storm-ml/.settings/org.eclipse.jdt.core.prefs
  6. +4 0 storm-ml/.settings/org.eclipse.m2e.core.prefs
  7. +4 0 storm-ml/MainStorm.java
  8. +4 0 storm-ml/PerceptronTopology.java
  9. +16 0 storm-ml/src/main/clojure/com/twitter/util/datautil.clj
  10. +13 0 storm-ml/src/main/java/com/twitter/Main.java
  11. +52 0 storm-ml/src/main/java/com/twitter/MainOnlineTopology.java
  12. +61 0 storm-ml/src/main/java/com/twitter/algorithms/Aggregator.java
  13. +87 0 storm-ml/src/main/java/com/twitter/algorithms/Learner.java
  14. +30 0 storm-ml/src/main/java/com/twitter/algorithms/LossFunction.java
  15. +64 0 storm-ml/src/main/java/com/twitter/data/Example.java
  16. +10 0 storm-ml/src/main/java/com/twitter/data/HashAll.java
  17. +22 0 storm-ml/src/main/java/com/twitter/data/HashFunction.java
  18. +82 0 storm-ml/src/main/java/com/twitter/storm/primitives/LocalLearner.java
  19. +36 0 storm-ml/src/main/java/com/twitter/storm/primitives/TrainingSpout.java
  20. +35 0 storm-ml/src/main/java/com/twitter/util/Datautil.java
  21. +76 0 storm-ml/src/main/java/com/twitter/util/MathUtil.java
  22. +13 0 storm-ml/src/main/java/com/twitter/util/datautil.clj
  23. +100 0 storm-ml/src/main/resources/testSet.txt
BIN  .DS_Store
Binary file not shown
10 storm-ml/.classpath
... ... @@ -0,0 +1,10 @@
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<classpath>
  3 + <classpathentry kind="src" output="target/classes" path="src/main/java"/>
  4 + <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/clojure"/>
  5 + <classpathentry kind="src" output="target/test-classes" path="src/test/java"/>
  6 + <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"/>
  7 + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
  8 + <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
  9 + <classpathentry kind="output" path="target/classes"/>
  10 +</classpath>
36 storm-ml/.project
... ... @@ -0,0 +1,36 @@
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<projectDescription>
  3 + <name>storm-ml2</name>
  4 + <comment></comment>
  5 + <projects>
  6 + </projects>
  7 + <buildSpec>
  8 + <buildCommand>
  9 + <name>ccw.builder</name>
  10 + <arguments>
  11 + </arguments>
  12 + </buildCommand>
  13 + <buildCommand>
  14 + <name>org.eclipse.jdt.core.javabuilder</name>
  15 + <arguments>
  16 + </arguments>
  17 + </buildCommand>
  18 + <buildCommand>
  19 + <name>org.eclipse.m2e.core.maven2Builder</name>
  20 + <arguments>
  21 + </arguments>
  22 + </buildCommand>
  23 + </buildSpec>
  24 + <natures>
  25 + <nature>org.eclipse.jdt.core.javanature</nature>
  26 + <nature>org.eclipse.m2e.core.maven2Nature</nature>
  27 + <nature>ccw.nature</nature>
  28 + </natures>
  29 + <linkedResources>
  30 + <link>
  31 + <name>clojure</name>
  32 + <type>2</type>
  33 + <location>/Users/fbrubacher/Documents/workspace/storm-ml2/src/main/clojure</location>
  34 + </link>
  35 + </linkedResources>
  36 +</projectDescription>
2  storm-ml/.settings/org.eclipse.core.resources.prefs
... ... @@ -0,0 +1,2 @@
  1 +eclipse.preferences.version=1
  2 +encoding/<project>=UTF-8
5 storm-ml/.settings/org.eclipse.jdt.core.prefs
... ... @@ -0,0 +1,5 @@
  1 +eclipse.preferences.version=1
  2 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
  3 +org.eclipse.jdt.core.compiler.compliance=1.5
  4 +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
  5 +org.eclipse.jdt.core.compiler.source=1.5
4 storm-ml/.settings/org.eclipse.m2e.core.prefs
... ... @@ -0,0 +1,4 @@
  1 +activeProfiles=
  2 +eclipse.preferences.version=1
  3 +resolveWorkspaceProjects=true
  4 +version=1
4 storm-ml/MainStorm.java
... ... @@ -0,0 +1,4 @@
  1 +
  2 +public class MainStorm {
  3 +
  4 +}
4 storm-ml/PerceptronTopology.java
... ... @@ -0,0 +1,4 @@
  1 +
  2 +public class PerceptronTopology {
  3 +
  4 +}
16 storm-ml/src/main/clojure/com/twitter/util/datautil.clj
... ... @@ -0,0 +1,16 @@
  1 +(ns com.twitter.Datautil
  2 + (:require [clojure.string :as sstring])
  3 + (:gen-class))
  4 +
  5 +(defn parse-multiple-to-double
  6 + ""
  7 + [& args]
  8 + (map #(Double/parseDouble %) args))
  9 +
  10 +(def load-dataset
  11 + (let [data-text (map #(sstring/split % #"\t")
  12 + (sstring/split-lines (slurp "testSet.txt")))]
  13 + (map #(apply parse-multiple-to-double %) data-text)))
  14 +
  15 +(def array-dataset
  16 + (into-array (map (partial into-array Double/TYPE) load-dataset)))
13 storm-ml/src/main/java/com/twitter/Main.java
... ... @@ -0,0 +1,13 @@
  1 +package com.twitter;
  2 +
  3 +import java.io.IOException;
  4 +
  5 +import com.twitter.util.MathUtil;
  6 +
  7 +public class Main {
  8 +
  9 + public static void main(String[] args) throws IOException {
  10 + int dimension = MathUtil.nextLikelyPrime(10000);
  11 + // Learner learner = new OnlinePerceptron(dimension);
  12 + }
  13 +}
52 storm-ml/src/main/java/com/twitter/MainOnlineTopology.java
... ... @@ -0,0 +1,52 @@
  1 +package com.twitter;
  2 +
  3 +import java.io.File;
  4 +import java.io.IOException;
  5 +import java.util.ArrayList;
  6 +import java.util.List;
  7 +import java.util.Scanner;
  8 +
  9 +import backtype.storm.Config;
  10 +import backtype.storm.LocalCluster;
  11 +import backtype.storm.topology.TopologyBuilder;
  12 +import backtype.storm.tuple.Values;
  13 +import backtype.storm.utils.Utils;
  14 +
  15 +import com.twitter.storm.primitives.LocalLearner;
  16 +import com.twitter.storm.primitives.TrainingSpout;
  17 +import com.twitter.util.MathUtil;
  18 +
  19 +public class MainOnlineTopology {
  20 +
  21 + public static List<List<Object>> readExamples(String fileName) throws IOException {
  22 + Scanner in = new Scanner(new File(fileName));
  23 + List<List<Object>> tupleList = new ArrayList<List<Object>>();
  24 + while (in.hasNext()) {
  25 + String line = in.nextLine();
  26 + tupleList.add(new Values(line));
  27 + }
  28 + in.close();
  29 + return tupleList;
  30 + }
  31 +
  32 + public static void main(String[] args) throws Exception {
  33 + int dimension = MathUtil.nextLikelyPrime(10);
  34 + System.out.println("Using dimension: " + dimension);
  35 +
  36 + // Map exampleMap = new HashMap<Integer, List<List<Object>>>();
  37 + // exampleMap.put(0, readExamples(args[0]));
  38 +
  39 + TopologyBuilder builder = new TopologyBuilder();
  40 + builder.setSpout("example_spitter", new TrainingSpout());
  41 + builder.setBolt("local_learner", new LocalLearner(2), 1).shuffleGrouping("example_spitter");
  42 + Config conf = new Config();
  43 + conf.setDebug(true);
  44 + LocalCluster cluster = new LocalCluster();
  45 + cluster.submitTopology("test", conf, builder.createTopology());
  46 + Utils.sleep(10000);
  47 + cluster.killTopology("test");
  48 + cluster.shutdown();
  49 +
  50 + // builder.setBolt("local_learner", new LocalLearner(dimension), 1).customGrouping(spout, grouping);
  51 + }
  52 +}
61 storm-ml/src/main/java/com/twitter/algorithms/Aggregator.java
... ... @@ -0,0 +1,61 @@
  1 +package com.twitter.algorithms;
  2 +
  3 +import java.util.Arrays;
  4 +import java.util.Map;
  5 +
  6 +import org.apache.log4j.Logger;
  7 +
  8 +import backtype.storm.coordination.BatchOutputCollector;
  9 +import backtype.storm.task.OutputCollector;
  10 +import backtype.storm.task.TopologyContext;
  11 +import backtype.storm.topology.OutputFieldsDeclarer;
  12 +import backtype.storm.topology.base.BaseRichBolt;
  13 +import backtype.storm.transactional.ICommitter;
  14 +import backtype.storm.tuple.Tuple;
  15 +
  16 +import com.twitter.util.MathUtil;
  17 +
  18 +public class Aggregator extends BaseRichBolt implements ICommitter {
  19 +
  20 + public static Logger LOG = Logger.getLogger(Aggregator.class);
  21 + double[] aggregateWeights = null;
  22 + double totalUpdateWeight = 0.0;
  23 +
  24 + public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) {
  25 + // TODO Auto-generated method stub
  26 +
  27 + }
  28 +
  29 + public void execute(Tuple tuple) {
  30 +
  31 + double[] weight = (double[]) tuple.getValue(1);
  32 + double parallelUpdateWeight = (Double) tuple.getValue(2);
  33 + if (parallelUpdateWeight != 1.0) {
  34 + weight = MathUtil.times(weight, parallelUpdateWeight);
  35 + }
  36 + if (aggregateWeights == null) {
  37 + aggregateWeights = weight;
  38 + } else {
  39 + MathUtil.plus(aggregateWeights, weight);
  40 + }
  41 + totalUpdateWeight += parallelUpdateWeight;
  42 + }
  43 +
  44 + public void finishBatch() {
  45 + if (aggregateWeights != null) {
  46 + MathUtil.times(aggregateWeights, 1.0 / totalUpdateWeight);
  47 + LOG.info("New weight vector: " + Arrays.toString(aggregateWeights));
  48 + }
  49 + }
  50 +
  51 + public void declareOutputFields(OutputFieldsDeclarer declarer) {
  52 + // TODO Auto-generated method stub
  53 +
  54 + }
  55 +
  56 + public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
  57 + // TODO Auto-generated method stub
  58 +
  59 + }
  60 +
  61 +}
87 storm-ml/src/main/java/com/twitter/algorithms/Learner.java
... ... @@ -0,0 +1,87 @@
  1 +package com.twitter.algorithms;
  2 +
  3 +import java.io.Serializable;
  4 +import java.util.Arrays;
  5 +
  6 +import org.apache.log4j.Logger;
  7 +
  8 +import com.twitter.data.Example;
  9 +import com.twitter.storm.primitives.LocalLearner;
  10 +import com.twitter.util.MathUtil;
  11 +
  12 +public class Learner implements Serializable {
  13 + public static Logger LOG = Logger.getLogger(LocalLearner.class);
  14 +
  15 + protected double[] weights;
  16 + protected LossFunction lossFunction;
  17 + int numExamples = 0;
  18 + int numMisclassified = 0;
  19 + double totalLoss = 0.0;
  20 + double gradientSum = 0.0;
  21 + protected double learningRate = 1.0;
  22 +
  23 + public Learner(int dimension) {
  24 + weights = new double[dimension];
  25 + lossFunction = new LossFunction(2);
  26 + }
  27 +
  28 + public void update(Example example, int epoch) {
  29 + int predicted = predict(example);
  30 + updateStats(example, predicted);
  31 + LOG.debug("EXAMPLE " + example.label + " PREDICTED: " + predicted);
  32 + if (example.isLabeled) {
  33 + if ((double) predicted != example.label) {
  34 + double[] gradient = lossFunction.gradient(example, predicted);
  35 + gradientSum += MathUtil.l2norm(gradient);
  36 + double eta = getLearningRate(example, epoch);
  37 + MathUtil.plus(weights, MathUtil.times(gradient, -1.0 * eta));
  38 + }
  39 + }
  40 + displayStats();
  41 + }
  42 +
  43 + protected double getLearningRate(Example example, int timestamp) {
  44 + return learningRate / Math.sqrt(timestamp);
  45 + }
  46 +
  47 + public double[] getWeights() {
  48 + return weights;
  49 + }
  50 +
  51 + public double getParallelUpdateWeight() {
  52 + return gradientSum;
  53 + }
  54 +
  55 + public void initWeights(double[] newWeights) {
  56 + assert (newWeights.length == weights.length);
  57 + weights = Arrays.copyOf(newWeights, newWeights.length);
  58 + }
  59 +
  60 + public int predict(Example example) {
  61 + double dot = MathUtil.dot(weights, example.x);
  62 + return (dot >= 0.0) ? 1 : -1;
  63 + }
  64 +
  65 + protected void updateStats(Example example, int prediction) {
  66 + numExamples++;
  67 + if (example.label != prediction)
  68 + numMisclassified++;
  69 + totalLoss += lossFunction.get(example, prediction);
  70 + }
  71 +
  72 + public void displayStats() {
  73 + if (numExamples == 0) {
  74 + System.out.println("No examples seen so far.");
  75 + }
  76 + double accuracy = 1.0 - numMisclassified * 1.0 / numExamples;
  77 + double meanLoss = totalLoss * 1.0 / numExamples;
  78 + LOG.info(String.format("Accuracy: %g\tMean Loss: %g", accuracy, meanLoss));
  79 +
  80 + }
  81 +
  82 + public void resetStats() {
  83 + numExamples = 0;
  84 + numMisclassified = 0;
  85 + totalLoss = 0.0;
  86 + }
  87 +}
30 storm-ml/src/main/java/com/twitter/algorithms/LossFunction.java
... ... @@ -0,0 +1,30 @@
  1 +package com.twitter.algorithms;
  2 +
  3 +import java.io.Serializable;
  4 +
  5 +import com.twitter.data.Example;
  6 +
  7 +public class LossFunction implements Serializable {
  8 + private double[] grad; // gradient
  9 +
  10 + public LossFunction(int dimension) {
  11 + grad = new double[dimension];
  12 + }
  13 +
  14 + public double get(Example e, int prediction) {
  15 + return 0.5 * (e.label - prediction) * (e.label - prediction);
  16 + }
  17 +
  18 + public double[] gradient(Example e, int prediction) {
  19 + double f = -1.0 * (e.label - prediction);
  20 + for (int i = 0; i < e.x.length; i++) {
  21 + grad[i] = f * e.x[i];
  22 + }
  23 + return grad;
  24 + }
  25 +
  26 + static LossFunction byName(String name, int dimension) {
  27 + return new LossFunction(dimension);
  28 + }
  29 +
  30 +}
64 storm-ml/src/main/java/com/twitter/data/Example.java
... ... @@ -0,0 +1,64 @@
  1 +package com.twitter.data;
  2 +
  3 +import java.util.Arrays;
  4 +
  5 +/**
  6 + * @author Delip Rao
  7 + */
  8 +public class Example {
  9 + public double[] x;
  10 + public double label;
  11 + public boolean isLabeled;
  12 + public double importance;
  13 + public String tag;
  14 +
  15 + public Example(int dimension) {
  16 + x = new double[dimension];
  17 + isLabeled = false;
  18 + }
  19 +
  20 + /**
  21 + *
  22 + * @param example
  23 + * string representation of an example [+1,-1] | tag | importance | extra_info | feature:value pairs
  24 + */
  25 + public void parseFrom(String example, HashFunction hashFunction) {
  26 + int dimension = x.length;
  27 + example = example.trim();
  28 + String[] toks = example.split("\\|");
  29 + for (int i = 0; i < toks.length; i++) {
  30 + toks[i] = toks[i].trim();
  31 + }
  32 + try {
  33 + if (toks[0].equals("-1") || toks[0].equals("+1") || toks[0].equals("1") || toks[0].equals("0")) {
  34 + // label = Integer.parseInt(toks[0]);
  35 + isLabeled = true;
  36 + }
  37 + tag = toks[1];
  38 + importance = 1.0;
  39 + if (!toks[2].isEmpty()) {
  40 + importance = Double.parseDouble(toks[2]);
  41 + }
  42 + String extraInfo = toks[3];
  43 + // TODO (Delip): parse extraInfo
  44 + for (String fv : toks[4].split("\\s+")) {
  45 + String[] tmp = fv.split(":");
  46 + String feature = tmp[0];
  47 + double value = 1.0;
  48 + if (tmp.length == 2) {
  49 + value = Double.parseDouble(tmp[1]);
  50 + }
  51 + int index = hashFunction.hash(feature, 0) % dimension;
  52 + x[index] += value;
  53 + }
  54 + } catch (Throwable e) {
  55 + System.err.println("Error Parsing:\n" + example);
  56 + e.printStackTrace();
  57 + return;
  58 + }
  59 + }
  60 +
  61 + public String toString() {
  62 + return label + ":" + Arrays.toString(x);
  63 + }
  64 +}
10 storm-ml/src/main/java/com/twitter/data/HashAll.java
... ... @@ -0,0 +1,10 @@
  1 +package com.twitter.data;
  2 +
  3 +
  4 +public class HashAll extends HashFunction {
  5 +
  6 + @Override
  7 + public int hash(String key, int seed) {
  8 + return murmurHash32(key, seed);
  9 + }
  10 +}
22 storm-ml/src/main/java/com/twitter/data/HashFunction.java
... ... @@ -0,0 +1,22 @@
  1 +package com.twitter.data;
  2 +
  3 +import com.google.common.hash.Hashing;
  4 +
  5 +public abstract class HashFunction {
  6 + public abstract int hash(String key, int seed);
  7 +
  8 + /**
  9 + * Generates 32 bit hash from byte array and seed using the murmur hash algorithm
  10 + *
  11 + * @param key
  12 + * string to hash
  13 + * @param seed
  14 + * initial seed value
  15 + * @return 32 bit hash of the given string
  16 + */
  17 + protected int murmurHash32(final String key, int seed) {
  18 + int h = Hashing.murmur3_32(seed).hashString(key).asInt();
  19 + h *= (h < 0) ? -1 : 1;
  20 + return h;
  21 + }
  22 +}
82 storm-ml/src/main/java/com/twitter/storm/primitives/LocalLearner.java
... ... @@ -0,0 +1,82 @@
  1 +package com.twitter.storm.primitives;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.Arrays;
  5 +import java.util.List;
  6 +import java.util.Map;
  7 +
  8 +import org.apache.log4j.Logger;
  9 +
  10 +import backtype.storm.task.OutputCollector;
  11 +import backtype.storm.task.TopologyContext;
  12 +import backtype.storm.topology.OutputFieldsDeclarer;
  13 +import backtype.storm.topology.base.BaseRichBolt;
  14 +import backtype.storm.transactional.ICommitter;
  15 +import backtype.storm.tuple.Fields;
  16 +import backtype.storm.tuple.Tuple;
  17 +import backtype.storm.tuple.Values;
  18 +
  19 +import com.twitter.algorithms.Learner;
  20 +import com.twitter.data.Example;
  21 +import com.twitter.data.HashAll;
  22 +
  23 +public class LocalLearner extends BaseRichBolt implements ICommitter {
  24 + public static Logger LOG = Logger.getLogger(LocalLearner.class);
  25 +
  26 + private int dimension;
  27 + OutputCollector _collector;
  28 + List<Example> buffer = new ArrayList<Example>();
  29 + Object id;
  30 + OutputCollector collector;
  31 + HashAll hashFunction;
  32 + Learner learner;
  33 + double[] weightVector;
  34 +
  35 + public LocalLearner(int dimension) {
  36 + this(dimension, new Learner(dimension));// , new HashAll());
  37 + }
  38 +
  39 + public LocalLearner(int dimension, Learner onlinePerceptron) {// , HashAll hashAll) {
  40 + this.dimension = dimension;
  41 + this.learner = onlinePerceptron;
  42 + // this.hashFunction = hashAll;
  43 + weightVector = new double[dimension];
  44 + }
  45 +
  46 + public void execute(Tuple tuple) {
  47 + LOG.debug("Old weights" + Arrays.toString(learner.getWeights()));
  48 + Example example = new Example(2);
  49 + example.x[0] = (Double) tuple.getValue(0);
  50 + example.x[1] = (Double) tuple.getValue(1);
  51 + example.label = (Double) tuple.getValue(2);
  52 + example.isLabeled = true;
  53 + learner.update(example, 1);
  54 + _collector.ack(tuple);
  55 + LOG.debug("New weights" + Arrays.toString(learner.getWeights()));
  56 + // example.parseFrom((String) tuple.getValue(1), hashFunction);
  57 + // buffer.add(example);
  58 + }
  59 +
  60 + public void finishBatch() {
  61 + if (buffer.size() == 0)
  62 + return;
  63 + learner.initWeights(weightVector);
  64 + for (Example e : buffer) {
  65 + learner.update(e, 1);
  66 + }
  67 +
  68 + collector.emit(new Values(id, learner.getWeights(), learner.getParallelUpdateWeight()));
  69 + }
  70 +
  71 + public void declareOutputFields(OutputFieldsDeclarer declarer) {
  72 + declarer.declare(new Fields("id", "weight_vector", "parallel_update_weights"));
  73 + }
  74 +
  75 + public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
  76 + this.collector = collector;
  77 + learner.initWeights(weightVector);
  78 + _collector = collector;
  79 + weightVector = (double[]) context.getTaskData();
  80 + context.setTaskData(weightVector);
  81 + }
  82 +}
36 storm-ml/src/main/java/com/twitter/storm/primitives/TrainingSpout.java
... ... @@ -0,0 +1,36 @@
  1 +package com.twitter.storm.primitives;
  2 +
  3 +import java.util.List;
  4 +import java.util.Map;
  5 +
  6 +import backtype.storm.spout.SpoutOutputCollector;
  7 +import backtype.storm.task.TopologyContext;
  8 +import backtype.storm.topology.OutputFieldsDeclarer;
  9 +import backtype.storm.topology.base.BaseRichSpout;
  10 +import backtype.storm.tuple.Fields;
  11 +import backtype.storm.tuple.Values;
  12 +import backtype.storm.utils.Utils;
  13 +
  14 +import com.twitter.util.Datautil;
  15 +
  16 +public class TrainingSpout extends BaseRichSpout {
  17 + SpoutOutputCollector _collector;
  18 +
  19 + public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
  20 + _collector = collector;
  21 + }
  22 +
  23 + public void nextTuple() {
  24 + Utils.sleep(100);
  25 + List<Double[]> dataSet = new Datautil().readTrainingFile();
  26 + for (Double[] trainingItem : dataSet) {
  27 + _collector.emit(new Values(trainingItem));
  28 + }
  29 +
  30 + }
  31 +
  32 + public void declareOutputFields(OutputFieldsDeclarer declarer) {
  33 + declarer.declare(new Fields("trainingItem1", "t2", "t3"));
  34 + }
  35 +
  36 +}
35 storm-ml/src/main/java/com/twitter/util/Datautil.java
... ... @@ -0,0 +1,35 @@
  1 +package com.twitter.util;
  2 +
  3 +import java.io.BufferedReader;
  4 +import java.io.FileNotFoundException;
  5 +import java.io.FileReader;
  6 +import java.io.IOException;
  7 +import java.util.ArrayList;
  8 +import java.util.List;
  9 +
  10 +public class Datautil {
  11 +
  12 + public List<Double[]> readTrainingFile() {
  13 + List<Double[]> lines = new ArrayList<Double[]>();
  14 + String strLine;
  15 + try {
  16 + BufferedReader br = new BufferedReader(new FileReader("src/main/resources/testSet.txt"));
  17 + while ((strLine = br.readLine()) != null) {
  18 + String[] values = strLine.split("\\t");
  19 + Double[] line = new Double[3];
  20 + for (int i = 0; i <= 2; i++) {
  21 + line[i] = Double.parseDouble(values[i]);
  22 + }
  23 + lines.add(line);
  24 + }
  25 +
  26 + } catch (FileNotFoundException e) {
  27 + // TODO Auto-generated catch block
  28 + e.printStackTrace();
  29 + } catch (IOException e) {
  30 + // TODO Auto-generated catch block
  31 + e.printStackTrace();
  32 + }
  33 + return lines;
  34 + }
  35 +}
76 storm-ml/src/main/java/com/twitter/util/MathUtil.java
... ... @@ -0,0 +1,76 @@
  1 +package com.twitter.util;
  2 +
  3 +import java.math.BigInteger;
  4 +import java.util.Arrays;
  5 +
  6 +/**
  7 + * Misc. math util functions
  8 + * (refactor with Twitter specific ones)
  9 + * @author Delip Rao
  10 + */
  11 +public class MathUtil {
  12 + public static double l2norm(double [] v) {
  13 + double sum = 0;
  14 + for (double d : v) {
  15 + sum += d*d;
  16 + }
  17 + return sum;
  18 + }
  19 +
  20 + public static double [] zero(double [] v) {
  21 + for (int i = 0; i < v.length; i++) {
  22 + v[i] = 0;
  23 + }
  24 + return v;
  25 + }
  26 +
  27 + public static double [] times(double [] v, double factor) {
  28 + for (int i = 0; i < v.length; i++) {
  29 + v[i] *= factor;
  30 + }
  31 + return v;
  32 + }
  33 +
  34 + public static double [] timesC(double [] v, double factor) {
  35 + double [] vc = Arrays.copyOf(v, v.length);
  36 + for (int i = 0; i < v.length; i++) {
  37 + vc[i] *= factor;
  38 + }
  39 + return vc;
  40 + }
  41 +
  42 + public static double [] plus(double [] v, double [] u) {
  43 + for (int i = 0; i < v.length; i++) {
  44 + v[i] += u[i];
  45 + }
  46 + return v;
  47 + }
  48 +
  49 + public static double [] minus(double [] v, double [] u) {
  50 + for (int i = 0; i < v.length; i++) {
  51 + v[i] -= u[i];
  52 + }
  53 + return v;
  54 + }
  55 +
  56 + public static double [] minusC(double [] v, double [] u) {
  57 + double [] vc = Arrays.copyOf(v, v.length);
  58 + for (int i = 0; i < v.length; i++) {
  59 + vc[i] -= u[i];
  60 + }
  61 + return vc;
  62 + }
  63 +
  64 + public static double dot(double [] u, double [] v) {
  65 + double result = 0;
  66 + for (int i = 0; i < v.length; i++) {
  67 + result += u[i]*v[i];
  68 + }
  69 + return result;
  70 + }
  71 +
  72 + public static int nextLikelyPrime(int n) {
  73 + String s = String.valueOf(n - 1);
  74 + return new BigInteger(s).nextProbablePrime().intValue();
  75 + }
  76 +}
13 storm-ml/src/main/java/com/twitter/util/datautil.clj
... ... @@ -0,0 +1,13 @@
  1 +(ns com.twitter.util.DataUtil
  2 + (:gen-class))
  3 +
  4 +(defn parse-multiple-to-float
  5 + ""
  6 + [& args]
  7 + (map #(Double/parseDouble %) args))
  8 +
  9 +(defn load-dataset
  10 + []
  11 + (let [data-text (map #(sutils/split % #"\t")
  12 + (sutils/split-lines (slurp "testSet.txt")))]
  13 + (map #(apply parse-multiple-to-float %) data-text))
100 storm-ml/src/main/resources/testSet.txt
... ... @@ -0,0 +1,100 @@
  1 +-0.017612 14.053064 -1
  2 +-1.395634 4.662541 1
  3 +-0.752157 6.538620 -1
  4 +-1.322371 7.152853 -1
  5 +0.423363 11.054677 -1
  6 +0.406704 7.067335 1
  7 +0.667394 12.741452 -1
  8 +-2.460150 6.866805 1
  9 +0.569411 9.548755 -1
  10 +-0.026632 10.427743 -1
  11 +0.850433 6.920334 1
  12 +1.347183 13.175500 -1
  13 +1.176813 3.167020 1
  14 +-1.781871 9.097953 -1
  15 +-0.566606 5.749003 1
  16 +0.931635 1.589505 1
  17 +-0.024205 6.151823 1
  18 +-0.036453 2.690988 1
  19 +-0.196949 0.444165 1
  20 +1.014459 5.754399 1
  21 +1.985298 3.230619 1
  22 +-1.693453 -0.557540 1
  23 +-0.576525 11.778922 -1
  24 +-0.346811 -1.678730 1
  25 +-2.124484 2.672471 1
  26 +1.217916 9.597015 -1
  27 +-0.733928 9.098687 -1
  28 +-3.642001 -1.618087 1
  29 +0.315985 3.523953 1
  30 +1.416614 9.619232 -1
  31 +-0.386323 3.989286 1
  32 +0.556921 8.294984 1
  33 +1.224863 11.587360 -1
  34 +-1.347803 -2.406051 1
  35 +1.196604 4.951851 1
  36 +0.275221 9.543647 -1
  37 +0.470575 9.332488 -1
  38 +-1.889567 9.542662 -1
  39 +-1.527893 12.150579 -1
  40 +-1.185247 11.309318 -1
  41 +-0.445678 3.297303 1
  42 +1.042222 6.105155 1
  43 +-0.618787 10.320986 -1
  44 +1.152083 0.548467 1
  45 +0.828534 2.676045 1
  46 +-1.237728 10.549033 -1
  47 +-0.683565 -2.166125 1
  48 +0.229456 5.921938 1
  49 +-0.959885 11.555336 -1
  50 +0.492911 10.993324 -1
  51 +0.184992 8.721488 -1
  52 +-0.355715 10.325976 -1
  53 +-0.397822 8.058397 -1
  54 +0.824839 13.730343 -1
  55 +1.507278 5.027866 1
  56 +0.099671 6.835839 1
  57 +-0.344008 10.717485 -1
  58 +1.785928 7.718645 1
  59 +-0.918801 11.560217 -1
  60 +-0.364009 4.747300 1
  61 +-0.841722 4.119083 1
  62 +0.490426 1.960539 1
  63 +-0.007194 9.075792 -1
  64 +0.356107 12.447863 -1
  65 +0.342578 12.281162 -1
  66 +-0.810823 -1.466018 1
  67 +2.530777 6.476801 1
  68 +1.296683 11.607559 -1
  69 +0.475487 12.040035 -1
  70 +-0.783277 11.009725 -1
  71 +0.074798 11.023650 -1
  72 +-1.337472 0.468339 1
  73 +-0.102781 13.763651 -1
  74 +-0.147324 2.874846 1
  75 +0.518389 9.887035 -1
  76 +1.015399 7.571882 -1
  77 +-1.658086 -0.027255 1
  78 +1.319944 2.171228 1
  79 +2.056216 5.019981 1
  80 +-0.851633 4.375691 1
  81 +-1.510047 6.061992 -1
  82 +-1.076637 -3.181888 1
  83 +1.821096 10.283990 -1
  84 +3.010150 8.401766 1
  85 +-1.099458 1.688274 1
  86 +-0.834872 -1.733869 1
  87 +-0.846637 3.849075 1
  88 +1.400102 12.628781 -1
  89 +1.752842 5.468166 1
  90 +0.078557 0.059736 1
  91 +0.089392 -0.715300 1
  92 +1.825662 12.693808 -1
  93 +0.197445 9.744638 -1
  94 +0.126117 0.922311 1
  95 +-0.679797 1.220530 1
  96 +0.677983 2.556666 1
  97 +0.761349 10.693862 -1
  98 +-2.168791 0.143632 1
  99 +1.388610 9.341997 -1
  100 +0.317029 14.739025 -1

0 comments on commit 5b2ac55

Please sign in to comment.
Something went wrong with that request. Please try again.