Skip to content

Commit

Permalink
created basic categorical parser
Browse files Browse the repository at this point in the history
  • Loading branch information
amensiko committed Jul 2, 2016
1 parent 3b97c25 commit ee30f89
Show file tree
Hide file tree
Showing 230 changed files with 242,460 additions and 15 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -16,7 +16,7 @@ $ mvn install assembly:assembly
```shell
$ cd target/sentiment
$ mkdir -p model/org/apache/tika/parser/sentiment/topic/
$ bin/sentiment SentimentTrainer -model model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin -lang en -data ./../../examples/result -encoding UTF-8
$ bin/sentiment SentimentTrainer -model model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin -lang en -data ./../../examples/categorical_dataset -encoding UTF-8
```
The model is written to en-sentiment.bin

Expand All @@ -25,7 +25,7 @@ The model is written to en-sentiment.bin
Make sure you are in target/sentiment

```shell
$ bin/sentiment Tika -model model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin -o ../../examples/gun-output1 ../../examples/gun-ads
$ bin/sentiment Tika -model model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin -o ../../examples/gun-output1 -j ../../examples/gun-ads
```


Expand Down
4 changes: 2 additions & 2 deletions examples/SentimentCount.java
Expand Up @@ -27,9 +27,9 @@ else if (content.substring(i, i+8).equalsIgnoreCase("negative")) {

public static void main(String args[]) throws FileNotFoundException {
SentimentCount sentCount = new SentimentCount();
File file = new File("./out");
File file = new File("./out1");
sentCount.counter(file);
System.out.println("Positive: " + sentCount.pos);
System.out.println("Negative: " + sentCount.neg);
}
}
}
239,232 changes: 239,232 additions & 0 deletions examples/categorical_dataset

Large diffs are not rendered by default.

Empty file removed examples/gun-ads/11914.sent
Empty file.
Empty file removed examples/gun-ads/18204.sent
Empty file.
12 changes: 7 additions & 5 deletions examples/gun-output1/.out
@@ -1,5 +1,7 @@
Content-Length: 14340
Content-Type: application/octet-stream
X-Parsed-By: org.apache.tika.parser.EmptyParser
model: target/sentiment/model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin
resourceName: .DS_Store
{
"Content-Length": "14340",
"Content-Type": "application/octet-stream",
"X-Parsed-By": "org.apache.tika.parser.EmptyParser",
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": ".DS_Store"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/10068.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100029",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "10068.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/105.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100034",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "105.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/10635.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100044",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "10635.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/10649.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100074",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "10649.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/10671.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100073",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "10671.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/1074.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100362",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "1074.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/10966.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100025",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "10966.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/11309.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100042",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "11309.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/11668.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100047",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "11668.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/11802.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100004",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "11802.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/11914.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment: ": "negative",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "11914.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/120.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment": "positive",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "120.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/12036.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100375",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "12036.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/12572.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment": "positive",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "12572.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/12575.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment": "positive",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "12575.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/12688.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100032",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "12688.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/1336.out
@@ -0,0 +1,11 @@
{
"Content-Length": "101072",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "1336.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/13587.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100069",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "13587.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/13828.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment": "positive",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "13828.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/1393.out
@@ -0,0 +1,11 @@
{
"Content-Length": "101877",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "1393.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/14220.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100017",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "14220.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15142.out
@@ -0,0 +1,11 @@
{
"Content-Length": "101509",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15142.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15164.out
@@ -0,0 +1,11 @@
{
"Content-Length": "101205",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15164.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15393.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100004",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15393.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15641.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100011",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15641.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15802.out
@@ -0,0 +1,11 @@
{
"Content-Length": "100982",
"Content-Type": "application/sentiment",
"Sentiment": "neutral",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15802.sent"
}
11 changes: 11 additions & 0 deletions examples/gun-output1/15835.out
@@ -0,0 +1,11 @@
{
"Content-Length": "0",
"Content-Type": "application/sentiment",
"Sentiment": "positive",
"X-Parsed-By": [
"org.apache.tika.parser.DefaultParser",
"org.apache.tika.parser.sentiment.analysis.SentimentParser"
],
"model": "model/org/apache/tika/parser/sentiment/topic/en-sentiment.bin",
"resourceName": "15835.sent"
}

0 comments on commit ee30f89

Please sign in to comment.