diff --git a/suppressions.xml b/suppressions.xml
index 3b929c8b..b23a50d4 100644
--- a/suppressions.xml
+++ b/suppressions.xml
@@ -28,7 +28,6 @@
-
diff --git a/tigon-client/src/main/java/co/cask/tigon/StandaloneMain.java b/tigon-client/src/main/java/co/cask/tigon/StandaloneMain.java
index 61a2b848..1c115bdf 100644
--- a/tigon-client/src/main/java/co/cask/tigon/StandaloneMain.java
+++ b/tigon-client/src/main/java/co/cask/tigon/StandaloneMain.java
@@ -95,7 +95,7 @@ public static void main(String[] args) {
List arguments = Lists.newArrayList();
arguments.add("/Users/gandu/workspace/tigon/tigon-examples/SentimentAnalysis/target/" +
"SentimentAnalysis-0.1.0-SNAPSHOT.jar");
- arguments.add("co.cask.tigon.SentimentAnalysis");
+ arguments.add("co.cask.tigon.sentiment.SentimentAnalysis");
args = arguments.toArray(new String[arguments.size()]);
System.out.println("Tigon Standalone Client");
if (args.length > 0) {
diff --git a/tigon-examples/SentimentAnalysis/pom.xml b/tigon-examples/SentimentAnalysis/pom.xml
index f81d705e..592a04d4 100644
--- a/tigon-examples/SentimentAnalysis/pom.xml
+++ b/tigon-examples/SentimentAnalysis/pom.xml
@@ -28,7 +28,7 @@
SentimentAnalysis
- co.cask.tigon.sentimentanalysis.SentimentAnalysis
+ co.cask.tigon.sentiment.SentimentAnalysis
@@ -37,6 +37,11 @@
tigon-api
${project.version}
+
+ com.lingpipe
+ lingpipe
+ 4.1.0
+
org.slf4j
slf4j-api
@@ -66,5 +71,4 @@
-
\ No newline at end of file
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Analysis.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Analysis.java
new file mode 100644
index 00000000..e4bf842f
--- /dev/null
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Analysis.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2014 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package co.cask.tigon.sentiment;
+
+import co.cask.tigon.api.annotation.Batch;
+import co.cask.tigon.api.annotation.ProcessInput;
+import co.cask.tigon.api.flow.flowlet.AbstractFlowlet;
+import co.cask.tigon.api.flow.flowlet.FlowletContext;
+import com.google.common.base.Throwables;
+import com.google.common.io.ByteStreams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+
+/**
+ * Basic java-based sentiment classifier.
+ */
+public class Analysis extends AbstractFlowlet {
+
+ private static final Logger LOG = LoggerFactory.getLogger(Analysis.class);
+ private static final String LOCALIZED_FILENAME = "localized.txt";
+
+ TextClassifier classifierModel = null;
+
+ @Override
+ public void initialize(FlowletContext context) throws Exception {
+ super.initialize(context);
+
+ InputStream in = null;
+ FileOutputStream out = null;
+ try {
+ in = this.getClass().getClassLoader().getResourceAsStream("java_trained_classifier.txt");
+ out = new FileOutputStream(LOCALIZED_FILENAME); // localized within container, so it get cleaned.
+ ByteStreams.copy(in, out);
+ } catch (IOException e) {
+ throw Throwables.propagate(e);
+ } finally {
+ try {
+ if (in != null) {
+ in.close();
+ }
+ if (out != null) {
+ out.close();
+ }
+ } catch (IOException e) {
+ throw Throwables.propagate(e);
+ }
+ }
+ InputStream modelInputStream = new FileInputStream(new File(LOCALIZED_FILENAME));
+ classifierModel = TextClassifier.createFromObjectStream(new ObjectInputStream(modelInputStream));
+ LOG.info("Initialized Analysis flowlet.");
+ }
+
+ @Batch(100)
+ @ProcessInput
+ public void classifyTweet(String tweet) throws FileNotFoundException, ClassifierResultException {
+// while (tweetIterator.hasNext()) {
+// System.out.println(classify(tweetIterator.next()));
+// }
+ System.out.println(classify(tweet).toString());
+ }
+
+ public ClassificationResult classify(String text) throws FileNotFoundException, ClassifierResultException {
+ return classifierModel.classify(text);
+ }
+}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassificationResult.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassificationResult.java
new file mode 100644
index 00000000..25174511
--- /dev/null
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassificationResult.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2014 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package co.cask.tigon.sentiment;
+
+import com.google.common.base.Objects;
+
+/**
+ * This class represents the value and confidence of any classification.
+ */
+public class ClassificationResult {
+ private String value;
+ private double confidence;
+ private Sentiment sentiment;
+
+ public ClassificationResult(String value, double confidence) throws ClassifierResultException {
+ this.value = value;
+ this.confidence = confidence;
+
+ if (value.equals("pos")) {
+ sentiment = Sentiment.positive;
+ } else if (value.equals("neg")) {
+ sentiment = Sentiment.negative;
+ } else if (value.equals("neu")) {
+ sentiment = Sentiment.neutral;
+ } else {
+ throw new ClassifierResultException("Classifier return result not recognized. ");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return Objects.toStringHelper(this)
+ .add("sentiment", sentiment.toString())
+ .add("value", value)
+ .add("confidence", confidence).toString();
+ }
+
+ public static enum Sentiment {
+ positive, neutral, negative;
+ }
+
+}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassifierResultException.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassifierResultException.java
new file mode 100644
index 00000000..8452b3c7
--- /dev/null
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/ClassifierResultException.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright © 2014 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package co.cask.tigon.sentiment;
+
+/**
+ * Exception when the Classifier couldn't classify correctly.
+ */
+public class ClassifierResultException extends Exception {
+ public ClassifierResultException(String message) {
+ super(message);
+ }
+}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/Normalization.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Normalization.java
similarity index 93%
rename from tigon-examples/SentimentAnalysis/src/main/java/Normalization.java
rename to tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Normalization.java
index c400ca81..1788d5ef 100644
--- a/tigon-examples/SentimentAnalysis/src/main/java/Normalization.java
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/Normalization.java
@@ -13,6 +13,8 @@
* License for the specific language governing permissions and limitations under
* the License.
*/
+package co.cask.tigon.sentiment;
+
import co.cask.tigon.api.annotation.Batch;
import co.cask.tigon.api.annotation.ProcessInput;
@@ -27,16 +29,13 @@
public class Normalization extends AbstractFlowlet {
private static final Logger LOG = LoggerFactory.getLogger(Normalization.class);
- /**
- * Emitter for emitting sentences from this Flowlet.
- */
private OutputEmitter out;
@ProcessInput
@Batch(100)
public void process(String text) {
if (text != null) {
- LOG.info(text);
+ LOG.info("Received tweet: " + text);
out.emit(text);
}
}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/SentimentAnalysis.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/SentimentAnalysis.java
similarity index 90%
rename from tigon-examples/SentimentAnalysis/src/main/java/SentimentAnalysis.java
rename to tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/SentimentAnalysis.java
index e8d37d25..34437d28 100644
--- a/tigon-examples/SentimentAnalysis/src/main/java/SentimentAnalysis.java
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/SentimentAnalysis.java
@@ -13,6 +13,7 @@
* License for the specific language governing permissions and limitations under
* the License.
*/
+package co.cask.tigon.sentiment;
import co.cask.tigon.api.flow.Flow;
import co.cask.tigon.api.flow.FlowSpecification;
@@ -30,8 +31,10 @@ public FlowSpecification configure() {
.withFlowlets()
.add(new TweetCollector())
.add(new Normalization())
+ .add(new Analysis())
.connect()
.from(new TweetCollector()).to(new Normalization())
+ .from(new Normalization()).to(new Analysis())
.build();
}
}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TextClassifier.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TextClassifier.java
new file mode 100644
index 00000000..7b16f0bb
--- /dev/null
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TextClassifier.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2014 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package co.cask.tigon.sentiment;
+
+import com.aliasi.classify.Classification;
+import com.aliasi.classify.LMClassifier;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+
+/**
+ * Basic Text Classification
+ *
+ * Comments: has no vectorization strategy beyond using the default
+ * implementation
+ *
+ *
+ */
+public class TextClassifier{
+
+ private final LMClassifier classifier;
+
+ private TextClassifier(LMClassifier classifier) {
+ this.classifier = classifier;
+ }
+
+ public ClassificationResult classify(String data) throws ClassifierResultException {
+ Classification classification = classifier.classify(data);
+ return new ClassificationResult(classification.bestCategory(), 1.0);
+ }
+
+ public static TextClassifier createFromObjectStream(ObjectInputStream inputStream)
+ throws IOException, ClassNotFoundException {
+ LMClassifier classifier = (LMClassifier) inputStream.readObject();
+ return new TextClassifier(classifier);
+ }
+}
diff --git a/tigon-examples/SentimentAnalysis/src/main/java/TweetCollector.java b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TweetCollector.java
similarity index 87%
rename from tigon-examples/SentimentAnalysis/src/main/java/TweetCollector.java
rename to tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TweetCollector.java
index 679d68d3..c95ddc3f 100644
--- a/tigon-examples/SentimentAnalysis/src/main/java/TweetCollector.java
+++ b/tigon-examples/SentimentAnalysis/src/main/java/co/cask/tigon/sentiment/TweetCollector.java
@@ -1,8 +1,25 @@
+/*
+ * Copyright © 2014 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package co.cask.tigon.sentiment;
+
import co.cask.tigon.api.annotation.Tick;
import co.cask.tigon.api.flow.flowlet.AbstractFlowlet;
import co.cask.tigon.api.flow.flowlet.FlowletContext;
import co.cask.tigon.api.flow.flowlet.OutputEmitter;
-import co.cask.tigon.api.metrics.Metrics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import twitter4j.Status;
diff --git a/tigon-examples/SentimentAnalysis/src/main/resources/java_trained_classifier.txt b/tigon-examples/SentimentAnalysis/src/main/resources/java_trained_classifier.txt
new file mode 100644
index 00000000..b2bb6512
Binary files /dev/null and b/tigon-examples/SentimentAnalysis/src/main/resources/java_trained_classifier.txt differ
diff --git a/tigon-examples/SentimentAnalysis/src/main/resources/twitter4j.properties b/tigon-examples/SentimentAnalysis/src/main/resources/twitter4j.properties
index 35606a27..5ce613af 100644
--- a/tigon-examples/SentimentAnalysis/src/main/resources/twitter4j.properties
+++ b/tigon-examples/SentimentAnalysis/src/main/resources/twitter4j.properties
@@ -1,5 +1,21 @@
-oauth.consumerKey=guzpQsLtdKs0jlap64nY1nX4N
-oauth.consumerSecret=z7Ux5TPQyeOXd8xlXmm87V3qxi1vABSV9NFVelDaCus8m39tIe
-oauth.accessToken=366210197-mpzoVZgENXzrEnVXvgdOqoDkCv55m2M5IYSp4ouv
-oauth.accessTokenSecret=Q9ST3W4d68KgBnIKmQYWgFaBdCBrVsQXtE54ol8UhudoL
-debug=false
\ No newline at end of file
+#
+# Copyright © 2014 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+
+oauth.consumerKey=5bPCAbHLyeNIzGbaCyFZIQMMl
+oauth.consumerSecret=OTH2It9nYhyqolWIi0068v7Oa1WLXviW3swT7DpYJTCxD9mx6B
+oauth.accessToken=2827531970-P8HdLEm8bYusoXXMM0bUHivCVqJRQO7wDHlTeRB
+oauth.accessTokenSecret=18togr81QeDw9GM7F4OSmo8aSZgLKmoUjfeAJmnKD0445
+debug=false