Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added new project structure

  • Loading branch information...
commit 5b2ac555f693731e90b1c5978fdc694408c7e96d 1 parent b2c9e41
Federico Brubacher authored
Showing with 762 additions and 0 deletions.
  1. BIN  .DS_Store
  2. +10 −0 storm-ml/.classpath
  3. +36 −0 storm-ml/.project
  4. +2 −0  storm-ml/.settings/org.eclipse.core.resources.prefs
  5. +5 −0 storm-ml/.settings/org.eclipse.jdt.core.prefs
  6. +4 −0 storm-ml/.settings/org.eclipse.m2e.core.prefs
  7. +4 −0 storm-ml/MainStorm.java
  8. +4 −0 storm-ml/PerceptronTopology.java
  9. +16 −0 storm-ml/src/main/clojure/com/twitter/util/datautil.clj
  10. +13 −0 storm-ml/src/main/java/com/twitter/Main.java
  11. +52 −0 storm-ml/src/main/java/com/twitter/MainOnlineTopology.java
  12. +61 −0 storm-ml/src/main/java/com/twitter/algorithms/Aggregator.java
  13. +87 −0 storm-ml/src/main/java/com/twitter/algorithms/Learner.java
  14. +30 −0 storm-ml/src/main/java/com/twitter/algorithms/LossFunction.java
  15. +64 −0 storm-ml/src/main/java/com/twitter/data/Example.java
  16. +10 −0 storm-ml/src/main/java/com/twitter/data/HashAll.java
  17. +22 −0 storm-ml/src/main/java/com/twitter/data/HashFunction.java
  18. +82 −0 storm-ml/src/main/java/com/twitter/storm/primitives/LocalLearner.java
  19. +36 −0 storm-ml/src/main/java/com/twitter/storm/primitives/TrainingSpout.java
  20. +35 −0 storm-ml/src/main/java/com/twitter/util/Datautil.java
  21. +76 −0 storm-ml/src/main/java/com/twitter/util/MathUtil.java
  22. +13 −0 storm-ml/src/main/java/com/twitter/util/datautil.clj
  23. +100 −0 storm-ml/src/main/resources/testSet.txt
BIN  .DS_Store
View
Binary file not shown
10 storm-ml/.classpath
View
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java"/>
+ <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/clojure"/>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java"/>
+ <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
36 storm-ml/.project
View
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>storm-ml2</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>ccw.builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ <nature>ccw.nature</nature>
+ </natures>
+ <linkedResources>
+ <link>
+ <name>clojure</name>
+ <type>2</type>
+ <location>/Users/fbrubacher/Documents/workspace/storm-ml2/src/main/clojure</location>
+ </link>
+ </linkedResources>
+</projectDescription>
2  storm-ml/.settings/org.eclipse.core.resources.prefs
View
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+encoding/<project>=UTF-8
5 storm-ml/.settings/org.eclipse.jdt.core.prefs
View
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
4 storm-ml/.settings/org.eclipse.m2e.core.prefs
View
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
4 storm-ml/MainStorm.java
View
@@ -0,0 +1,4 @@
+
+public class MainStorm {
+
+}
4 storm-ml/PerceptronTopology.java
View
@@ -0,0 +1,4 @@
+
+public class PerceptronTopology {
+
+}
16 storm-ml/src/main/clojure/com/twitter/util/datautil.clj
View
@@ -0,0 +1,16 @@
+(ns com.twitter.Datautil
+ (:require [clojure.string :as sstring])
+ (:gen-class))
+
+(defn parse-multiple-to-double
+ ""
+ [& args]
+ (map #(Double/parseDouble %) args))
+
+(def load-dataset
+ (let [data-text (map #(sstring/split % #"\t")
+ (sstring/split-lines (slurp "testSet.txt")))]
+ (map #(apply parse-multiple-to-double %) data-text)))
+
+(def array-dataset
+ (into-array (map (partial into-array Double/TYPE) load-dataset)))
13 storm-ml/src/main/java/com/twitter/Main.java
View
@@ -0,0 +1,13 @@
+package com.twitter;
+
+import java.io.IOException;
+
+import com.twitter.util.MathUtil;
+
+public class Main {
+
+ public static void main(String[] args) throws IOException {
+ int dimension = MathUtil.nextLikelyPrime(10000);
+ // Learner learner = new OnlinePerceptron(dimension);
+ }
+}
52 storm-ml/src/main/java/com/twitter/MainOnlineTopology.java
View
@@ -0,0 +1,52 @@
+package com.twitter;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Scanner;
+
+import backtype.storm.Config;
+import backtype.storm.LocalCluster;
+import backtype.storm.topology.TopologyBuilder;
+import backtype.storm.tuple.Values;
+import backtype.storm.utils.Utils;
+
+import com.twitter.storm.primitives.LocalLearner;
+import com.twitter.storm.primitives.TrainingSpout;
+import com.twitter.util.MathUtil;
+
+public class MainOnlineTopology {
+
+ public static List<List<Object>> readExamples(String fileName) throws IOException {
+ Scanner in = new Scanner(new File(fileName));
+ List<List<Object>> tupleList = new ArrayList<List<Object>>();
+ while (in.hasNext()) {
+ String line = in.nextLine();
+ tupleList.add(new Values(line));
+ }
+ in.close();
+ return tupleList;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int dimension = MathUtil.nextLikelyPrime(10);
+ System.out.println("Using dimension: " + dimension);
+
+ // Map exampleMap = new HashMap<Integer, List<List<Object>>>();
+ // exampleMap.put(0, readExamples(args[0]));
+
+ TopologyBuilder builder = new TopologyBuilder();
+ builder.setSpout("example_spitter", new TrainingSpout());
+ builder.setBolt("local_learner", new LocalLearner(2), 1).shuffleGrouping("example_spitter");
+ Config conf = new Config();
+ conf.setDebug(true);
+ LocalCluster cluster = new LocalCluster();
+ cluster.submitTopology("test", conf, builder.createTopology());
+ Utils.sleep(10000);
+ cluster.killTopology("test");
+ cluster.shutdown();
+
+ // builder.setBolt("local_learner", new LocalLearner(dimension), 1).customGrouping(spout, grouping);
+ }
+}
61 storm-ml/src/main/java/com/twitter/algorithms/Aggregator.java
View
@@ -0,0 +1,61 @@
+package com.twitter.algorithms;
+
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import backtype.storm.coordination.BatchOutputCollector;
+import backtype.storm.task.OutputCollector;
+import backtype.storm.task.TopologyContext;
+import backtype.storm.topology.OutputFieldsDeclarer;
+import backtype.storm.topology.base.BaseRichBolt;
+import backtype.storm.transactional.ICommitter;
+import backtype.storm.tuple.Tuple;
+
+import com.twitter.util.MathUtil;
+
+public class Aggregator extends BaseRichBolt implements ICommitter {
+
+ public static Logger LOG = Logger.getLogger(Aggregator.class);
+ double[] aggregateWeights = null;
+ double totalUpdateWeight = 0.0;
+
+ public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public void execute(Tuple tuple) {
+
+ double[] weight = (double[]) tuple.getValue(1);
+ double parallelUpdateWeight = (Double) tuple.getValue(2);
+ if (parallelUpdateWeight != 1.0) {
+ weight = MathUtil.times(weight, parallelUpdateWeight);
+ }
+ if (aggregateWeights == null) {
+ aggregateWeights = weight;
+ } else {
+ MathUtil.plus(aggregateWeights, weight);
+ }
+ totalUpdateWeight += parallelUpdateWeight;
+ }
+
+ public void finishBatch() {
+ if (aggregateWeights != null) {
+ MathUtil.times(aggregateWeights, 1.0 / totalUpdateWeight);
+ LOG.info("New weight vector: " + Arrays.toString(aggregateWeights));
+ }
+ }
+
+ public void declareOutputFields(OutputFieldsDeclarer declarer) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
+ // TODO Auto-generated method stub
+
+ }
+
+}
87 storm-ml/src/main/java/com/twitter/algorithms/Learner.java
View
@@ -0,0 +1,87 @@
+package com.twitter.algorithms;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.log4j.Logger;
+
+import com.twitter.data.Example;
+import com.twitter.storm.primitives.LocalLearner;
+import com.twitter.util.MathUtil;
+
+public class Learner implements Serializable {
+ public static Logger LOG = Logger.getLogger(LocalLearner.class);
+
+ protected double[] weights;
+ protected LossFunction lossFunction;
+ int numExamples = 0;
+ int numMisclassified = 0;
+ double totalLoss = 0.0;
+ double gradientSum = 0.0;
+ protected double learningRate = 1.0;
+
+ public Learner(int dimension) {
+ weights = new double[dimension];
+ lossFunction = new LossFunction(2);
+ }
+
+ public void update(Example example, int epoch) {
+ int predicted = predict(example);
+ updateStats(example, predicted);
+ LOG.debug("EXAMPLE " + example.label + " PREDICTED: " + predicted);
+ if (example.isLabeled) {
+ if ((double) predicted != example.label) {
+ double[] gradient = lossFunction.gradient(example, predicted);
+ gradientSum += MathUtil.l2norm(gradient);
+ double eta = getLearningRate(example, epoch);
+ MathUtil.plus(weights, MathUtil.times(gradient, -1.0 * eta));
+ }
+ }
+ displayStats();
+ }
+
+ protected double getLearningRate(Example example, int timestamp) {
+ return learningRate / Math.sqrt(timestamp);
+ }
+
+ public double[] getWeights() {
+ return weights;
+ }
+
+ public double getParallelUpdateWeight() {
+ return gradientSum;
+ }
+
+ public void initWeights(double[] newWeights) {
+ assert (newWeights.length == weights.length);
+ weights = Arrays.copyOf(newWeights, newWeights.length);
+ }
+
+ public int predict(Example example) {
+ double dot = MathUtil.dot(weights, example.x);
+ return (dot >= 0.0) ? 1 : -1;
+ }
+
+ protected void updateStats(Example example, int prediction) {
+ numExamples++;
+ if (example.label != prediction)
+ numMisclassified++;
+ totalLoss += lossFunction.get(example, prediction);
+ }
+
+ public void displayStats() {
+ if (numExamples == 0) {
+ System.out.println("No examples seen so far.");
+ }
+ double accuracy = 1.0 - numMisclassified * 1.0 / numExamples;
+ double meanLoss = totalLoss * 1.0 / numExamples;
+ LOG.info(String.format("Accuracy: %g\tMean Loss: %g", accuracy, meanLoss));
+
+ }
+
+ public void resetStats() {
+ numExamples = 0;
+ numMisclassified = 0;
+ totalLoss = 0.0;
+ }
+}
30 storm-ml/src/main/java/com/twitter/algorithms/LossFunction.java
View
@@ -0,0 +1,30 @@
+package com.twitter.algorithms;
+
+import java.io.Serializable;
+
+import com.twitter.data.Example;
+
+public class LossFunction implements Serializable {
+ private double[] grad; // gradient
+
+ public LossFunction(int dimension) {
+ grad = new double[dimension];
+ }
+
+ public double get(Example e, int prediction) {
+ return 0.5 * (e.label - prediction) * (e.label - prediction);
+ }
+
+ public double[] gradient(Example e, int prediction) {
+ double f = -1.0 * (e.label - prediction);
+ for (int i = 0; i < e.x.length; i++) {
+ grad[i] = f * e.x[i];
+ }
+ return grad;
+ }
+
+ static LossFunction byName(String name, int dimension) {
+ return new LossFunction(dimension);
+ }
+
+}
64 storm-ml/src/main/java/com/twitter/data/Example.java
View
@@ -0,0 +1,64 @@
+package com.twitter.data;
+
+import java.util.Arrays;
+
+/**
+ * @author Delip Rao
+ */
+public class Example {
+ public double[] x;
+ public double label;
+ public boolean isLabeled;
+ public double importance;
+ public String tag;
+
+ public Example(int dimension) {
+ x = new double[dimension];
+ isLabeled = false;
+ }
+
+ /**
+ *
+ * @param example
+ * string representation of an example [+1,-1] | tag | importance | extra_info | feature:value pairs
+ */
+ public void parseFrom(String example, HashFunction hashFunction) {
+ int dimension = x.length;
+ example = example.trim();
+ String[] toks = example.split("\\|");
+ for (int i = 0; i < toks.length; i++) {
+ toks[i] = toks[i].trim();
+ }
+ try {
+ if (toks[0].equals("-1") || toks[0].equals("+1") || toks[0].equals("1") || toks[0].equals("0")) {
+ // label = Integer.parseInt(toks[0]);
+ isLabeled = true;
+ }
+ tag = toks[1];
+ importance = 1.0;
+ if (!toks[2].isEmpty()) {
+ importance = Double.parseDouble(toks[2]);
+ }
+ String extraInfo = toks[3];
+ // TODO (Delip): parse extraInfo
+ for (String fv : toks[4].split("\\s+")) {
+ String[] tmp = fv.split(":");
+ String feature = tmp[0];
+ double value = 1.0;
+ if (tmp.length == 2) {
+ value = Double.parseDouble(tmp[1]);
+ }
+ int index = hashFunction.hash(feature, 0) % dimension;
+ x[index] += value;
+ }
+ } catch (Throwable e) {
+ System.err.println("Error Parsing:\n" + example);
+ e.printStackTrace();
+ return;
+ }
+ }
+
+ public String toString() {
+ return label + ":" + Arrays.toString(x);
+ }
+}
10 storm-ml/src/main/java/com/twitter/data/HashAll.java
View
@@ -0,0 +1,10 @@
+package com.twitter.data;
+
+
+public class HashAll extends HashFunction {
+
+ @Override
+ public int hash(String key, int seed) {
+ return murmurHash32(key, seed);
+ }
+}
22 storm-ml/src/main/java/com/twitter/data/HashFunction.java
View
@@ -0,0 +1,22 @@
+package com.twitter.data;
+
+import com.google.common.hash.Hashing;
+
+public abstract class HashFunction {
+ public abstract int hash(String key, int seed);
+
+ /**
+ * Generates 32 bit hash from byte array and seed using the murmur hash algorithm
+ *
+ * @param key
+ * string to hash
+ * @param seed
+ * initial seed value
+ * @return 32 bit hash of the given string
+ */
+ protected int murmurHash32(final String key, int seed) {
+ int h = Hashing.murmur3_32(seed).hashString(key).asInt();
+ h *= (h < 0) ? -1 : 1;
+ return h;
+ }
+}
82 storm-ml/src/main/java/com/twitter/storm/primitives/LocalLearner.java
View
@@ -0,0 +1,82 @@
+package com.twitter.storm.primitives;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import backtype.storm.task.OutputCollector;
+import backtype.storm.task.TopologyContext;
+import backtype.storm.topology.OutputFieldsDeclarer;
+import backtype.storm.topology.base.BaseRichBolt;
+import backtype.storm.transactional.ICommitter;
+import backtype.storm.tuple.Fields;
+import backtype.storm.tuple.Tuple;
+import backtype.storm.tuple.Values;
+
+import com.twitter.algorithms.Learner;
+import com.twitter.data.Example;
+import com.twitter.data.HashAll;
+
+public class LocalLearner extends BaseRichBolt implements ICommitter {
+ public static Logger LOG = Logger.getLogger(LocalLearner.class);
+
+ private int dimension;
+ OutputCollector _collector;
+ List<Example> buffer = new ArrayList<Example>();
+ Object id;
+ OutputCollector collector;
+ HashAll hashFunction;
+ Learner learner;
+ double[] weightVector;
+
+ public LocalLearner(int dimension) {
+ this(dimension, new Learner(dimension));// , new HashAll());
+ }
+
+ public LocalLearner(int dimension, Learner onlinePerceptron) {// , HashAll hashAll) {
+ this.dimension = dimension;
+ this.learner = onlinePerceptron;
+ // this.hashFunction = hashAll;
+ weightVector = new double[dimension];
+ }
+
+ public void execute(Tuple tuple) {
+ LOG.debug("Old weights" + Arrays.toString(learner.getWeights()));
+ Example example = new Example(2);
+ example.x[0] = (Double) tuple.getValue(0);
+ example.x[1] = (Double) tuple.getValue(1);
+ example.label = (Double) tuple.getValue(2);
+ example.isLabeled = true;
+ learner.update(example, 1);
+ _collector.ack(tuple);
+ LOG.debug("New weights" + Arrays.toString(learner.getWeights()));
+ // example.parseFrom((String) tuple.getValue(1), hashFunction);
+ // buffer.add(example);
+ }
+
+ public void finishBatch() {
+ if (buffer.size() == 0)
+ return;
+ learner.initWeights(weightVector);
+ for (Example e : buffer) {
+ learner.update(e, 1);
+ }
+
+ collector.emit(new Values(id, learner.getWeights(), learner.getParallelUpdateWeight()));
+ }
+
+ public void declareOutputFields(OutputFieldsDeclarer declarer) {
+ declarer.declare(new Fields("id", "weight_vector", "parallel_update_weights"));
+ }
+
+ public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
+ this.collector = collector;
+ learner.initWeights(weightVector);
+ _collector = collector;
+ weightVector = (double[]) context.getTaskData();
+ context.setTaskData(weightVector);
+ }
+}
36 storm-ml/src/main/java/com/twitter/storm/primitives/TrainingSpout.java
View
@@ -0,0 +1,36 @@
+package com.twitter.storm.primitives;
+
+import java.util.List;
+import java.util.Map;
+
+import backtype.storm.spout.SpoutOutputCollector;
+import backtype.storm.task.TopologyContext;
+import backtype.storm.topology.OutputFieldsDeclarer;
+import backtype.storm.topology.base.BaseRichSpout;
+import backtype.storm.tuple.Fields;
+import backtype.storm.tuple.Values;
+import backtype.storm.utils.Utils;
+
+import com.twitter.util.Datautil;
+
+public class TrainingSpout extends BaseRichSpout {
+ SpoutOutputCollector _collector;
+
+ public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
+ _collector = collector;
+ }
+
+ public void nextTuple() {
+ Utils.sleep(100);
+ List<Double[]> dataSet = new Datautil().readTrainingFile();
+ for (Double[] trainingItem : dataSet) {
+ _collector.emit(new Values(trainingItem));
+ }
+
+ }
+
+ public void declareOutputFields(OutputFieldsDeclarer declarer) {
+ declarer.declare(new Fields("trainingItem1", "t2", "t3"));
+ }
+
+}
35 storm-ml/src/main/java/com/twitter/util/Datautil.java
View
@@ -0,0 +1,35 @@
+package com.twitter.util;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Datautil {
+
+ public List<Double[]> readTrainingFile() {
+ List<Double[]> lines = new ArrayList<Double[]>();
+ String strLine;
+ try {
+ BufferedReader br = new BufferedReader(new FileReader("src/main/resources/testSet.txt"));
+ while ((strLine = br.readLine()) != null) {
+ String[] values = strLine.split("\\t");
+ Double[] line = new Double[3];
+ for (int i = 0; i <= 2; i++) {
+ line[i] = Double.parseDouble(values[i]);
+ }
+ lines.add(line);
+ }
+
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return lines;
+ }
+}
76 storm-ml/src/main/java/com/twitter/util/MathUtil.java
View
@@ -0,0 +1,76 @@
+package com.twitter.util;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+/**
+ * Misc. math util functions
+ * (refactor with Twitter specific ones)
+ * @author Delip Rao
+ */
+public class MathUtil {
+ public static double l2norm(double [] v) {
+ double sum = 0;
+ for (double d : v) {
+ sum += d*d;
+ }
+ return sum;
+ }
+
+ public static double [] zero(double [] v) {
+ for (int i = 0; i < v.length; i++) {
+ v[i] = 0;
+ }
+ return v;
+ }
+
+ public static double [] times(double [] v, double factor) {
+ for (int i = 0; i < v.length; i++) {
+ v[i] *= factor;
+ }
+ return v;
+ }
+
+ public static double [] timesC(double [] v, double factor) {
+ double [] vc = Arrays.copyOf(v, v.length);
+ for (int i = 0; i < v.length; i++) {
+ vc[i] *= factor;
+ }
+ return vc;
+ }
+
+ public static double [] plus(double [] v, double [] u) {
+ for (int i = 0; i < v.length; i++) {
+ v[i] += u[i];
+ }
+ return v;
+ }
+
+ public static double [] minus(double [] v, double [] u) {
+ for (int i = 0; i < v.length; i++) {
+ v[i] -= u[i];
+ }
+ return v;
+ }
+
+ public static double [] minusC(double [] v, double [] u) {
+ double [] vc = Arrays.copyOf(v, v.length);
+ for (int i = 0; i < v.length; i++) {
+ vc[i] -= u[i];
+ }
+ return vc;
+ }
+
+ public static double dot(double [] u, double [] v) {
+ double result = 0;
+ for (int i = 0; i < v.length; i++) {
+ result += u[i]*v[i];
+ }
+ return result;
+ }
+
+ public static int nextLikelyPrime(int n) {
+ String s = String.valueOf(n - 1);
+ return new BigInteger(s).nextProbablePrime().intValue();
+ }
+}
13 storm-ml/src/main/java/com/twitter/util/datautil.clj
View
@@ -0,0 +1,13 @@
+(ns com.twitter.util.DataUtil
+ (:gen-class))
+
+(defn parse-multiple-to-float
+ ""
+ [& args]
+ (map #(Double/parseDouble %) args))
+
+(defn load-dataset
+ []
+ (let [data-text (map #(sutils/split % #"\t")
+ (sutils/split-lines (slurp "testSet.txt")))]
+ (map #(apply parse-multiple-to-float %) data-text))
100 storm-ml/src/main/resources/testSet.txt
View
@@ -0,0 +1,100 @@
+-0.017612 14.053064 -1
+-1.395634 4.662541 1
+-0.752157 6.538620 -1
+-1.322371 7.152853 -1
+0.423363 11.054677 -1
+0.406704 7.067335 1
+0.667394 12.741452 -1
+-2.460150 6.866805 1
+0.569411 9.548755 -1
+-0.026632 10.427743 -1
+0.850433 6.920334 1
+1.347183 13.175500 -1
+1.176813 3.167020 1
+-1.781871 9.097953 -1
+-0.566606 5.749003 1
+0.931635 1.589505 1
+-0.024205 6.151823 1
+-0.036453 2.690988 1
+-0.196949 0.444165 1
+1.014459 5.754399 1
+1.985298 3.230619 1
+-1.693453 -0.557540 1
+-0.576525 11.778922 -1
+-0.346811 -1.678730 1
+-2.124484 2.672471 1
+1.217916 9.597015 -1
+-0.733928 9.098687 -1
+-3.642001 -1.618087 1
+0.315985 3.523953 1
+1.416614 9.619232 -1
+-0.386323 3.989286 1
+0.556921 8.294984 1
+1.224863 11.587360 -1
+-1.347803 -2.406051 1
+1.196604 4.951851 1
+0.275221 9.543647 -1
+0.470575 9.332488 -1
+-1.889567 9.542662 -1
+-1.527893 12.150579 -1
+-1.185247 11.309318 -1
+-0.445678 3.297303 1
+1.042222 6.105155 1
+-0.618787 10.320986 -1
+1.152083 0.548467 1
+0.828534 2.676045 1
+-1.237728 10.549033 -1
+-0.683565 -2.166125 1
+0.229456 5.921938 1
+-0.959885 11.555336 -1
+0.492911 10.993324 -1
+0.184992 8.721488 -1
+-0.355715 10.325976 -1
+-0.397822 8.058397 -1
+0.824839 13.730343 -1
+1.507278 5.027866 1
+0.099671 6.835839 1
+-0.344008 10.717485 -1
+1.785928 7.718645 1
+-0.918801 11.560217 -1
+-0.364009 4.747300 1
+-0.841722 4.119083 1
+0.490426 1.960539 1
+-0.007194 9.075792 -1
+0.356107 12.447863 -1
+0.342578 12.281162 -1
+-0.810823 -1.466018 1
+2.530777 6.476801 1
+1.296683 11.607559 -1
+0.475487 12.040035 -1
+-0.783277 11.009725 -1
+0.074798 11.023650 -1
+-1.337472 0.468339 1
+-0.102781 13.763651 -1
+-0.147324 2.874846 1
+0.518389 9.887035 -1
+1.015399 7.571882 -1
+-1.658086 -0.027255 1
+1.319944 2.171228 1
+2.056216 5.019981 1
+-0.851633 4.375691 1
+-1.510047 6.061992 -1
+-1.076637 -3.181888 1
+1.821096 10.283990 -1
+3.010150 8.401766 1
+-1.099458 1.688274 1
+-0.834872 -1.733869 1
+-0.846637 3.849075 1
+1.400102 12.628781 -1
+1.752842 5.468166 1
+0.078557 0.059736 1
+0.089392 -0.715300 1
+1.825662 12.693808 -1
+0.197445 9.744638 -1
+0.126117 0.922311 1
+-0.679797 1.220530 1
+0.677983 2.556666 1
+0.761349 10.693862 -1
+-2.168791 0.143632 1
+1.388610 9.341997 -1
+0.317029 14.739025 -1
Please sign in to comment.
Something went wrong with that request. Please try again.