Skip to content
This repository has been archived by the owner on Dec 27, 2022. It is now read-only.

Commit

Permalink
Working version
Browse files Browse the repository at this point in the history
  • Loading branch information
Parth Gandhi committed Sep 23, 2014
1 parent a5087a7 commit e965075
Show file tree
Hide file tree
Showing 12 changed files with 272 additions and 14 deletions.
1 change: 0 additions & 1 deletion suppressions.xml
Expand Up @@ -28,7 +28,6 @@
<suppress checks="JavadocStyle" files=".*/src/(main|integration)/java/.*" />
<suppress checks="JavadocStyle" files=".*/src/.*/internal/.*" />


<!-- copied from apache hadoop, won't fix style to keep diff minimal -->
<suppress checks=".*" files=".*/LocalJobRunnerWithFix.java" />

Expand Down
Expand Up @@ -95,7 +95,7 @@ public static void main(String[] args) {
List<String> arguments = Lists.newArrayList();
arguments.add("/Users/gandu/workspace/tigon/tigon-examples/SentimentAnalysis/target/" +
"SentimentAnalysis-0.1.0-SNAPSHOT.jar");
arguments.add("co.cask.tigon.SentimentAnalysis");
arguments.add("co.cask.tigon.sentiment.SentimentAnalysis");
args = arguments.toArray(new String[arguments.size()]);
System.out.println("Tigon Standalone Client");
if (args.length > 0) {
Expand Down
8 changes: 6 additions & 2 deletions tigon-examples/SentimentAnalysis/pom.xml
Expand Up @@ -28,7 +28,7 @@
<artifactId>SentimentAnalysis</artifactId>

<properties>
<app.main.class>co.cask.tigon.sentimentanalysis.SentimentAnalysis</app.main.class>
<app.main.class>co.cask.tigon.sentiment.SentimentAnalysis</app.main.class>
</properties>

<dependencies>
Expand All @@ -37,6 +37,11 @@
<artifactId>tigon-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.lingpipe</groupId>
<artifactId>lingpipe</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -66,5 +71,4 @@
</dependency>
</dependencies>


</project>
@@ -0,0 +1,87 @@
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.tigon.sentiment;

import co.cask.tigon.api.annotation.Batch;
import co.cask.tigon.api.annotation.ProcessInput;
import co.cask.tigon.api.flow.flowlet.AbstractFlowlet;
import co.cask.tigon.api.flow.flowlet.FlowletContext;
import com.google.common.base.Throwables;
import com.google.common.io.ByteStreams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;

/**
* Basic java-based sentiment classifier.
*/
public class Analysis extends AbstractFlowlet {

private static final Logger LOG = LoggerFactory.getLogger(Analysis.class);
private static final String LOCALIZED_FILENAME = "localized.txt";

TextClassifier classifierModel = null;

@Override
public void initialize(FlowletContext context) throws Exception {
super.initialize(context);

InputStream in = null;
FileOutputStream out = null;
try {
in = this.getClass().getClassLoader().getResourceAsStream("java_trained_classifier.txt");
out = new FileOutputStream(LOCALIZED_FILENAME); // localized within container, so it get cleaned.
ByteStreams.copy(in, out);
} catch (IOException e) {
throw Throwables.propagate(e);
} finally {
try {
if (in != null) {
in.close();
}
if (out != null) {
out.close();
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
InputStream modelInputStream = new FileInputStream(new File(LOCALIZED_FILENAME));
classifierModel = TextClassifier.createFromObjectStream(new ObjectInputStream(modelInputStream));
LOG.info("Initialized Analysis flowlet.");
}

@Batch(100)
@ProcessInput
public void classifyTweet(String tweet) throws FileNotFoundException, ClassifierResultException {
// while (tweetIterator.hasNext()) {
// System.out.println(classify(tweetIterator.next()));
// }
System.out.println(classify(tweet).toString());
}

public ClassificationResult classify(String text) throws FileNotFoundException, ClassifierResultException {
return classifierModel.classify(text);
}
}
@@ -0,0 +1,56 @@
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.tigon.sentiment;

import com.google.common.base.Objects;

/**
* This class represents the value and confidence of any classification.
*/
public class ClassificationResult {
private String value;
private double confidence;
private Sentiment sentiment;

public ClassificationResult(String value, double confidence) throws ClassifierResultException {
this.value = value;
this.confidence = confidence;

if (value.equals("pos")) {
sentiment = Sentiment.positive;
} else if (value.equals("neg")) {
sentiment = Sentiment.negative;
} else if (value.equals("neu")) {
sentiment = Sentiment.neutral;
} else {
throw new ClassifierResultException("Classifier return result not recognized. ");
}
}

@Override
public String toString() {
return Objects.toStringHelper(this)
.add("sentiment", sentiment.toString())
.add("value", value)
.add("confidence", confidence).toString();
}

public static enum Sentiment {
positive, neutral, negative;
}

}
@@ -0,0 +1,26 @@
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.tigon.sentiment;

/**
* Exception when the Classifier couldn't classify correctly.
*/
public class ClassifierResultException extends Exception {
public ClassifierResultException(String message) {
super(message);
}
}
Expand Up @@ -13,6 +13,8 @@
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.tigon.sentiment;


import co.cask.tigon.api.annotation.Batch;
import co.cask.tigon.api.annotation.ProcessInput;
Expand All @@ -27,16 +29,13 @@
public class Normalization extends AbstractFlowlet {
private static final Logger LOG = LoggerFactory.getLogger(Normalization.class);

/**
* Emitter for emitting sentences from this Flowlet.
*/
private OutputEmitter<String> out;

@ProcessInput
@Batch(100)
public void process(String text) {
if (text != null) {
LOG.info(text);
LOG.info("Received tweet: " + text);
out.emit(text);
}
}
Expand Down
Expand Up @@ -13,6 +13,7 @@
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.tigon.sentiment;

import co.cask.tigon.api.flow.Flow;
import co.cask.tigon.api.flow.FlowSpecification;
Expand All @@ -30,8 +31,10 @@ public FlowSpecification configure() {
.withFlowlets()
.add(new TweetCollector())
.add(new Normalization())
.add(new Analysis())
.connect()
.from(new TweetCollector()).to(new Normalization())
.from(new Normalization()).to(new Analysis())
.build();
}
}
@@ -0,0 +1,51 @@
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.tigon.sentiment;

import com.aliasi.classify.Classification;
import com.aliasi.classify.LMClassifier;

import java.io.IOException;
import java.io.ObjectInputStream;

/**
* Basic Text Classification
*
* Comments: has no vectorization strategy beyond using the default
* implementation
*
*
*/
public class TextClassifier{

private final LMClassifier classifier;

private TextClassifier(LMClassifier classifier) {
this.classifier = classifier;
}

public ClassificationResult classify(String data) throws ClassifierResultException {
Classification classification = classifier.classify(data);
return new ClassificationResult(classification.bestCategory(), 1.0);
}

public static TextClassifier createFromObjectStream(ObjectInputStream inputStream)
throws IOException, ClassNotFoundException {
LMClassifier classifier = (LMClassifier) inputStream.readObject();
return new TextClassifier(classifier);
}
}
@@ -1,8 +1,25 @@
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.tigon.sentiment;

import co.cask.tigon.api.annotation.Tick;
import co.cask.tigon.api.flow.flowlet.AbstractFlowlet;
import co.cask.tigon.api.flow.flowlet.FlowletContext;
import co.cask.tigon.api.flow.flowlet.OutputEmitter;
import co.cask.tigon.api.metrics.Metrics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import twitter4j.Status;
Expand Down
Binary file not shown.
@@ -1,5 +1,21 @@
oauth.consumerKey=guzpQsLtdKs0jlap64nY1nX4N
oauth.consumerSecret=z7Ux5TPQyeOXd8xlXmm87V3qxi1vABSV9NFVelDaCus8m39tIe
oauth.accessToken=366210197-mpzoVZgENXzrEnVXvgdOqoDkCv55m2M5IYSp4ouv
oauth.accessTokenSecret=Q9ST3W4d68KgBnIKmQYWgFaBdCBrVsQXtE54ol8UhudoL
debug=false
#
# Copyright © 2014 Cask Data, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
#

oauth.consumerKey=5bPCAbHLyeNIzGbaCyFZIQMMl
oauth.consumerSecret=OTH2It9nYhyqolWIi0068v7Oa1WLXviW3swT7DpYJTCxD9mx6B
oauth.accessToken=2827531970-P8HdLEm8bYusoXXMM0bUHivCVqJRQO7wDHlTeRB
oauth.accessTokenSecret=18togr81QeDw9GM7F4OSmo8aSZgLKmoUjfeAJmnKD0445
debug=false

0 comments on commit e965075

Please sign in to comment.