Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RD-1983 add support for topics and keyphrases #25

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Extent;
import com.basistech.rosette.dm.HanMorphoAnalysis;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.KoreanMorphoAnalysis;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
Expand All @@ -39,6 +40,7 @@
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.util.jackson.EnumModule;
Expand Down Expand Up @@ -86,6 +88,8 @@ public void setupModule(SetupContext context) {
context.setMixInAnnotations(Dependency.class, DependencyMixin.class);
context.setMixInAnnotations(EmbeddingCollection.class, EmbeddingCollectionMixin.class);
context.setMixInAnnotations(Embeddings.class, EmbeddingsMixin.class);
context.setMixInAnnotations(Topic.class, TopicMixin.class);
context.setMixInAnnotations(Keyphrase.class, KeyphraseMixin.class);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
import com.basistech.rosette.dm.Dependency;
import com.basistech.rosette.dm.Embeddings;
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
import com.basistech.rosette.dm.RelationshipMention;
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.fasterxml.jackson.annotation.JsonCreator;
Expand Down Expand Up @@ -124,4 +126,10 @@ public abstract class AnnotatedTextMixin {

@JsonIgnore
public abstract Embeddings getEmbeddings();

@JsonIgnore
public abstract ListAttribute<Topic> getTopics();

@JsonIgnore
public abstract ListAttribute<Keyphrase> getKeyphrases();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2017 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.dm.jackson;

import com.basistech.rosette.dm.Extent;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

import java.util.List;
import java.util.Map;

public abstract class KeyphraseMixin {
@JsonCreator
KeyphraseMixin(@JsonProperty("keyphrase") String keyphrase,
@JsonProperty("confidence") Double confidence,
@JsonProperty("extents")List<Extent> extents,
@JsonProperty("extendedProperties") Map<String, Object> extendedProperties) {
//
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import com.basistech.rosette.dm.Dependency;
import com.basistech.rosette.dm.Embeddings;
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
import com.basistech.rosette.dm.RelationshipComponent;
import com.basistech.rosette.dm.RelationshipMention;
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.rosette.dm.UnknownAttribute;
Expand Down Expand Up @@ -59,7 +61,9 @@ public enum KnownAttribute {
TOPIC_RESULT("topicResults", CategorizerResult.class),
LIST("list", ListAttribute.class),
UNKNOWN("unknown", UnknownAttribute.class),
RELATION_ARGUMENT("RelationshipComponent", RelationshipComponent.class);
RELATION_ARGUMENT("RelationshipComponent", RelationshipComponent.class),
TOPIC("topics", Topic.class),
KEYPHRASE("keyphrase", Keyphrase.class);

private final String jsonTag;
private final Class<? extends BaseAttribute> attributeClass;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2017 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.dm.jackson;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

import java.util.Map;

public abstract class TopicMixin {
@JsonCreator
TopicMixin(@JsonProperty("topic") String topic,
@JsonProperty("confidence") Double confidence,
@JsonProperty("conceptId") String topicId,
@JsonProperty("extendedProperties") Map<String, Object> extendedProperties) {
//
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Extent;
import com.basistech.rosette.dm.HanMorphoAnalysis;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.KoreanMorphoAnalysis;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
Expand All @@ -40,6 +41,7 @@
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.rosette.dm.jackson.ArabicMorphoAnalysisMixin;
Expand All @@ -53,6 +55,7 @@
import com.basistech.rosette.dm.jackson.EntityMixin;
import com.basistech.rosette.dm.jackson.ExtentMixin;
import com.basistech.rosette.dm.jackson.HanMorphoAnalysisMixin;
import com.basistech.rosette.dm.jackson.KeyphraseMixin;
import com.basistech.rosette.dm.jackson.KoreanMorphoAnalysisMixin;
import com.basistech.rosette.dm.jackson.MentionMixin;
import com.basistech.rosette.dm.jackson.MorphoAnalysisMixin;
Expand All @@ -63,6 +66,7 @@
import com.basistech.rosette.dm.jackson.ResolvedEntityMixin;
import com.basistech.rosette.dm.jackson.ScriptRegionMixin;
import com.basistech.rosette.dm.jackson.SentenceMixin;
import com.basistech.rosette.dm.jackson.TopicMixin;
import com.basistech.rosette.dm.jackson.TranslatedDataMixin;
import com.basistech.rosette.dm.jackson.TranslatedTokensMixin;
import com.basistech.util.jackson.EnumModule;
Expand Down Expand Up @@ -114,6 +118,8 @@ public void setupModule(SetupContext context) {
context.setMixInAnnotations(Dependency.class, DependencyMixin.class);
context.setMixInAnnotations(EmbeddingCollection.class, EmbeddingCollectionMixin.class);
context.setMixInAnnotations(Embeddings.class, EmbeddingsMixin.class);
context.setMixInAnnotations(Topic.class, TopicMixin.class);
context.setMixInAnnotations(Keyphrase.class, KeyphraseMixin.class);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
import com.basistech.rosette.dm.CategorizerResult;
import com.basistech.rosette.dm.Dependency;
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
import com.basistech.rosette.dm.RelationshipMention;
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.rosette.dm.jackson.DmTypeIdResolver;
Expand Down Expand Up @@ -122,4 +124,10 @@ public abstract class AnnotatedTextArrayMixin {

@JsonIgnore
public abstract ListAttribute<Dependency> getDependencies();

@JsonIgnore
public abstract ListAttribute<Topic> getTopics();

@JsonIgnore
public abstract ListAttribute<Keyphrase> getKeyphrases();
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.basistech.rosette.dm.CategorizerResult;
import com.basistech.rosette.dm.Extent;
import com.basistech.rosette.dm.HanMorphoAnalysis;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.KoreanMorphoAnalysis;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
Expand All @@ -30,6 +31,7 @@
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.rosette.dm.jackson.AnnotatedDataModelModule;
Expand Down Expand Up @@ -77,6 +79,8 @@ public class JsonTest extends AdmAssert {
private CategorizerResult categoryResult;
private CategorizerResult sentimentResult;
private CategorizerResult topicResult;
private Topic topic;
private Keyphrase keyphrase;
private AnnotatedText referenceText;

@Before
Expand Down Expand Up @@ -282,6 +286,18 @@ public void oneWithEverything() {
crBuilder.add(topicResult);
builder.topicResults(crBuilder.build());

ListAttribute.Builder<Topic> topicBuilder = new ListAttribute.Builder<>(Topic.class);
topic = new Topic.Builder("topic", 0.7, "Q100").build();
topicBuilder.add(topic);
builder.topics(topicBuilder.build());

ListAttribute.Builder<Keyphrase> keyphraseBuilder = new ListAttribute.Builder<>(Keyphrase.class);
keyphrase = new Keyphrase.Builder("keyphrase", 1.0,
Lists.newArrayList(new Extent.Builder(5, 6).build(), new Extent.Builder(6, 7).build()))
.build();
keyphraseBuilder.add(keyphrase);
builder.keyphrases(keyphraseBuilder.build());

referenceText = builder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.basistech.rosette.dm.Entity;
import com.basistech.rosette.dm.Extent;
import com.basistech.rosette.dm.HanMorphoAnalysis;
import com.basistech.rosette.dm.Keyphrase;
import com.basistech.rosette.dm.KoreanMorphoAnalysis;
import com.basistech.rosette.dm.LanguageDetection;
import com.basistech.rosette.dm.ListAttribute;
Expand All @@ -36,6 +37,7 @@
import com.basistech.rosette.dm.ScriptRegion;
import com.basistech.rosette.dm.Sentence;
import com.basistech.rosette.dm.Token;
import com.basistech.rosette.dm.Topic;
import com.basistech.rosette.dm.TranslatedData;
import com.basistech.rosette.dm.TranslatedTokens;
import com.basistech.rosette.dm.jackson.AnnotatedDataModelModule;
Expand Down Expand Up @@ -93,6 +95,8 @@ public class JsonTest extends AdmAssert {
private AnnotatedText referenceText;
private Entity entity;
private Embeddings embeddings;
private Topic topic;
private Keyphrase keyphrase;

@Before
public void oneWithEverythingOldEntities() {
Expand Down Expand Up @@ -306,6 +310,18 @@ public void oneWithEverythingOldEntities() {
embeddings = embeddingsBuilder.build();
builder.embeddings(embeddingsBuilder.build());

ListAttribute.Builder<Topic> topicBuilder = new ListAttribute.Builder<>(Topic.class);
topic = new Topic.Builder("topic", 0.7, "Q100").build();
topicBuilder.add(topic);
builder.topics(topicBuilder.build());

ListAttribute.Builder<Keyphrase> keyphraseBuilder = new ListAttribute.Builder<>(Keyphrase.class);
keyphrase = new Keyphrase.Builder("keyphrase", 1.0,
Lists.newArrayList(new Extent.Builder(5, 6).build(), new Extent.Builder(6, 7).build()))
.build();
keyphraseBuilder.add(keyphrase);
builder.keyphrases(keyphraseBuilder.build());

referenceTextOldEntities = builder.build();

}
Expand Down
19 changes: 19 additions & 0 deletions model/src/main/java/com/basistech/rosette/dm/AnnotatedText.java
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,15 @@ public Embeddings getEmbeddings() {
return (Embeddings) attributes.get(AttributeKey.EMBEDDING.key());
}

@SuppressWarnings("unchecked")
public ListAttribute<Topic> getTopics() {
return (ListAttribute<Topic>) attributes.get(AttributeKey.TOPIC.key());
}

@SuppressWarnings("unchecked")
public ListAttribute<Keyphrase> getKeyphrases() {
return (ListAttribute<Keyphrase>) attributes.get(AttributeKey.KEYPHRASE.key());
}
/**
* toString is a convenience for accessing the textual data, if any, in this annotated text.
* @return the data for this AnnotatedText as a String.
Expand Down Expand Up @@ -798,6 +807,16 @@ public Builder embeddings(Embeddings embeddings) {
return this;
}

public Builder topics(ListAttribute<Topic> topics) {
attributes.put(AttributeKey.TOPIC.key(), topics);
return this;
}

public Builder keyphrases(ListAttribute<Keyphrase> keyphrases) {
attributes.put(AttributeKey.KEYPHRASE.key(), keyphrases);
return this;
}

/**
* Adds an attribute.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ enum AttributeKey {
TOKEN("token"),
TRANSLATED_DATA("translatedData"),
TRANSLATED_TOKENS("translatedTokens"),
TOPIC_RESULTS("topicResults");
TOPIC_RESULTS("topicResults"),

TOPIC("topics"),
KEYPHRASE("keyphrases");

private final String key;

Expand Down
Loading