From f0020c407098873fbd5a369de4863e6b9adc592b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Kottmann?= Date: Thu, 26 Jan 2017 23:05:44 +0100 Subject: [PATCH] OPENNLP-966: Remove deprecated UIMA trainers --- opennlp-uima/descriptors/ChunkerTrainer.xml | 143 ------ .../descriptors/PersonNameFinderTrainer.xml | 168 ------- opennlp-uima/descriptors/PosTaggerTrainer.xml | 116 ----- .../descriptors/SentenceDetectorTrainer.xml | 106 ----- opennlp-uima/descriptors/TokenizerTrainer.xml | 124 ----- .../opennlp/uima/chunker/ChunkerTrainer.java | 236 --------- .../doccat/DocumentCategorizerTrainer.java | 162 ------- .../uima/namefind/NameFinderTrainer.java | 447 ------------------ .../opennlp/uima/postag/POSTaggerTrainer.java | 240 ---------- .../sentdetect/SentenceDetectorTrainer.java | 210 -------- .../uima/tokenize/TokenizerTrainer.java | 294 ------------ .../opennlp/uima/util/CasConsumerUtil.java | 411 ---------------- .../opennlp/uima/util/SampleTraceStream.java | 65 --- 13 files changed, 2722 deletions(-) delete mode 100644 opennlp-uima/descriptors/ChunkerTrainer.xml delete mode 100644 opennlp-uima/descriptors/PersonNameFinderTrainer.xml delete mode 100644 opennlp-uima/descriptors/PosTaggerTrainer.xml delete mode 100644 opennlp-uima/descriptors/SentenceDetectorTrainer.xml delete mode 100644 opennlp-uima/descriptors/TokenizerTrainer.xml delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java delete mode 100644 opennlp-uima/src/main/java/opennlp/uima/util/SampleTraceStream.java diff --git a/opennlp-uima/descriptors/ChunkerTrainer.xml b/opennlp-uima/descriptors/ChunkerTrainer.xml deleted file mode 100644 index fce959971..000000000 --- a/opennlp-uima/descriptors/ChunkerTrainer.xml +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - org.apache.uima.java - opennlp.uima.chunker.ChunkerTrainer - - POS Trainer - - ${pom.version} - Apache Software Foundation - - - - opennlp.uima.ModelName - String - false - true - - - - opennlp.uima.SentenceType - String - false - true - - - - opennlp.uima.TokenType - String - false - true - - - - opennlp.uima.POSFeature - String - false - true - - - - opennlp.uima.Language - String - false - true - - - - opennlp.uima.ChunkType - String - false - true - - - - opennlp.uima.ChunkTagFeature - String - false - true - - - - - - opennlp.uima.ModelName - - POS.bin - - - - - opennlp.uima.TokenType - - opennlp.uima.Token - - - - - opennlp.uima.SentenceType - - opennlp.uima.Sentence - - - - - opennlp.uima.POSFeature - - pos - - - - - opennlp.uima.Language - - en - - - - - opennlp.uima.ChunkType - - opennlp.uima.Chunk - - - - - opennlp.uima.ChunkTagFeature - - chunkType - - - - - - - - - - - false - false - - - \ No newline at end of file diff --git a/opennlp-uima/descriptors/PersonNameFinderTrainer.xml b/opennlp-uima/descriptors/PersonNameFinderTrainer.xml deleted file mode 100644 index a7f1f8c29..000000000 --- a/opennlp-uima/descriptors/PersonNameFinderTrainer.xml +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - org.apache.uima.java - - opennlp.uima.namefind.NameFinderTrainer - - Person Name Finder Trainer - - ${pom.version} - Apache Software Foundation - - - opennlp.uima.ModelName - String - false - true - - - - opennlp.uima.SentenceType - String - false - true - - - - opennlp.uima.TokenType - String - false - true - - - - opennlp.uima.NameType - String - false - true - - - - opennlp.uima.TrainingParamsFile - String - false - false - - - - opennlp.uima.AdditionalTrainingDataFile - String - false - false - - - - opennlp.uima.AdditionalTrainingDataEncoding - String - false - false - - - - opennlp.uima.SampleTraceFile - String - false - false - - - - opennlp.uima.SampleTraceFileEncoding - String - false - false - - - - opennlp.uima.FeatureGeneratorFile - String - false - false - - - - opennlp.uima.FeatureGeneratorResources - String - false - false - - - - opennlp.uima.Language - String - false - true - - - - - - - opennlp.uima.ModelName - - Person.bin - - - - - opennlp.uima.TokenType - - opennlp.uima.Token - - - - - opennlp.uima.SentenceType - - uima.tcas.DocumentAnnotation - - - - - opennlp.uima.NameType - - opennlp.uima.Person - - - - - opennlp.uima.Language - - en - - - - - - - - - - - - - - false - false - - - \ No newline at end of file diff --git a/opennlp-uima/descriptors/PosTaggerTrainer.xml b/opennlp-uima/descriptors/PosTaggerTrainer.xml deleted file mode 100644 index 325c76e05..000000000 --- a/opennlp-uima/descriptors/PosTaggerTrainer.xml +++ /dev/null @@ -1,116 +0,0 @@ - - - - - - org.apache.uima.java - opennlp.uima.postag.POSTaggerTrainer - - POS Trainer - - ${pom.version} - Apache Software Foundation - - - - opennlp.uima.ModelName - String - false - true - - - - opennlp.uima.SentenceType - String - false - true - - - - opennlp.uima.TokenType - String - false - true - - - - opennlp.uima.POSFeature - String - false - true - - - - opennlp.uima.Language - String - false - true - - - - - - - opennlp.uima.ModelName - - POS.bin - - - - - opennlp.uima.TokenType - - opennlp.uima.Token - - - - - opennlp.uima.SentenceType - - opennlp.uima.Sentence - - - - - opennlp.uima.POSFeature - - pos - - - - - opennlp.uima.Language - - en - - - - - - - - - - - false - false - - - \ No newline at end of file diff --git a/opennlp-uima/descriptors/SentenceDetectorTrainer.xml b/opennlp-uima/descriptors/SentenceDetectorTrainer.xml deleted file mode 100644 index 1db008f15..000000000 --- a/opennlp-uima/descriptors/SentenceDetectorTrainer.xml +++ /dev/null @@ -1,106 +0,0 @@ - - - - - - org.apache.uima.java - - opennlp.uima.sentdetect.SentenceDetectorTrainer - - Sentence Detector Trainer - - ${pom.version} - Apache Software Foundation - - - - opennlp.uima.ModelName - String - false - true - - - - opennlp.uima.SentenceType - String - false - true - - - - opennlp.uima.Language - String - false - true - - - - opennlp.uima.EOSChars - String - false - false - - - opennlp.uima.SampleTraceFile - String - false - false - - - opennlp.uima.SampleTraceFileEncoding - String - false - false - - - - - - opennlp.uima.ModelName - - SentDetect.bin - - - - - opennlp.uima.SentenceType - - opennlp.uima.Sentence - - - - - opennlp.uima.Language - - en - - - - - - - - - - false - false - - - \ No newline at end of file diff --git a/opennlp-uima/descriptors/TokenizerTrainer.xml b/opennlp-uima/descriptors/TokenizerTrainer.xml deleted file mode 100644 index 654f3dfa9..000000000 --- a/opennlp-uima/descriptors/TokenizerTrainer.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - - - org.apache.uima.java - - opennlp.uima.tokenize.TokenizerTrainer - - - TokenizerTrainer - - ${pom.version} - Apache Software Foundation - - - opennlp.uima.ModelName - String - false - true - - - opennlp.uima.SentenceType - String - false - true - - - opennlp.uima.TokenType - String - false - true - - - opennlp.uima.tokenizer.IsSkipAlphaNumerics - - Boolean - false - true - - - opennlp.uima.Language - String - false - true - - - opennlp.uima.SampleTraceFile - String - false - false - - - opennlp.uima.SampleTraceFileEncoding - String - false - false - - - - - opennlp.uima.ModelName - - Tokens.bin - - - - opennlp.uima.TokenType - - opennlp.uima.Token - - - - opennlp.uima.SentenceType - - uima.tcas.DocumentAnnotation - - - - - opennlp.uima.tokenizer.IsSkipAlphaNumerics - - - false - - - - opennlp.uima.Language - - en - - - - - - - - - - - - - false - false - - - \ No newline at end of file diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java deleted file mode 100644 index 30b3f2f08..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.chunker; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Feature; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.chunker.ChunkSample; -import opennlp.tools.chunker.ChunkerFactory; -import opennlp.tools.chunker.ChunkerME; -import opennlp.tools.chunker.ChunkerModel; -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.model.ModelUtil; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.ContainingConstraint; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP Chunker trainer. - *

- * Mandatory parameters - * - * - * - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.ModelName The name of the model file
String opennlp.uima.SentenceType The full name of the sentence type
String opennlp.uima.TokenType The full name of the token type
String opennlp.uima.POSFeature
String opennlp.uima.ChunkType
String opennlp.uima.ChunkTagFeature
- * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated -public class ChunkerTrainer extends CasConsumer_ImplBase { - - private List mChunkSamples = new ArrayList<>(); - - private UimaContext mContext; - - private String mModelName; - - private Type mSentenceType; - - private Type mTokenType; - - private Feature mPOSFeature; - - private Type mChunkType; - - private Feature mChunkTagFeature; - - private String language; - - /** - * Initializes the current instance. - */ - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - mContext = getUimaContext(); - - Logger mLogger = mContext.getLogger(); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker Trainer."); - } - - mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.LANGUAGE_PARAMETER); - } - - /** - * Initialize the current instance with the given type system. - */ - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - String sentenceTypeName = - CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); - - mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); - - String chunkTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - Chunker.CHUNK_TYPE_PARAMETER); - - mChunkType = CasConsumerUtil.getType(typeSystem, chunkTypeName); - - String chunkTagFeature = CasConsumerUtil.getRequiredStringParameter( - mContext, Chunker.CHUNK_TAG_FEATURE_PARAMETER); - - mChunkTagFeature = mChunkType.getFeatureByBaseName(chunkTagFeature); - - CasConsumerUtil.checkFeatureType(mChunkTagFeature, CAS.TYPE_NAME_STRING); - - String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.TOKEN_TYPE_PARAMETER); - - mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); - - String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.POS_FEATURE_PARAMETER); - - mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName); - - CasConsumerUtil.checkFeatureType(mPOSFeature, CAS.TYPE_NAME_STRING); - } - - /** - * Process the given CAS object. - */ - public void processCas(CAS cas) { - - FSIndex sentenceIndex = cas.getAnnotationIndex(mSentenceType); - - for (AnnotationFS sentenceAnnotation : sentenceIndex) { - processSentence(cas, sentenceAnnotation); - } - } - - private void processSentence(CAS tcas, AnnotationFS sentence) { - FSIndex chunkIndex = tcas.getAnnotationIndex(mChunkType); - - ContainingConstraint containingConstraint = - new ContainingConstraint(sentence); - - Iterator chunkIterator = tcas.createFilteredIterator( - chunkIndex.iterator(), containingConstraint); - - while (chunkIterator.hasNext()) { - AnnotationFS chunkAnnotation = chunkIterator.next(); - processChunk(tcas, chunkAnnotation); - } - } - - private void processChunk(CAS tcas, AnnotationFS chunk) { - - String chunkTag = chunk.getFeatureValueAsString(mChunkTagFeature); - - FSIndex tokenIndex = tcas.getAnnotationIndex(mTokenType); - - ContainingConstraint containingConstraint = - new ContainingConstraint(chunk); - - Iterator tokenIterator = tcas.createFilteredIterator(tokenIndex.iterator(), - containingConstraint); - - List tokens = new ArrayList<>(); - List tags = new ArrayList<>(); - List chunkTags = new ArrayList<>(); - - while (tokenIterator.hasNext()) { - AnnotationFS tokenAnnotation = tokenIterator.next(); - - tokens.add(tokenAnnotation.getCoveredText().trim()); - tags.add(tokenAnnotation.getFeatureValueAsString(mPOSFeature)); - chunkTags.add(chunkTag); - } - - mChunkSamples.add(new ChunkSample(tokens, tags, chunkTags)); - } - - /** - * Called if the processing is finished, this method - * does the training. - */ - public void collectionProcessComplete(ProcessTrace trace) - throws ResourceProcessException, IOException { - GIS.PRINT_MESSAGES = false; - - ChunkerModel chunkerModel = ChunkerME.train(language, - ObjectStreamUtils.createObjectStream(mChunkSamples), - ModelUtil.createDefaultTrainingParameters(), ChunkerFactory.create(null)); - - // dereference to allow garbage collection - mChunkSamples = null; - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + mModelName); - - OpennlpUtil.serialize(chunkerModel, modelFile); - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Releases allocated resources. - */ - public void destroy() { - mChunkSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java deleted file mode 100644 index ca788d7e2..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.doccat; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Feature; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.doccat.DoccatFactory; -import opennlp.tools.doccat.DoccatModel; -import opennlp.tools.doccat.DocumentCategorizerME; -import opennlp.tools.doccat.DocumentSample; -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.TrainingParameters; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP NameFinder trainer. - *

- * Note: This class is still work in progress, and should not be used! - * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated - -public class DocumentCategorizerTrainer extends CasConsumer_ImplBase { - - private UimaContext mContext; - - private String mModelName; - - private List documentSamples = new ArrayList<>(); - - private Type mCategoryType; - - private Feature mCategoryFeature; - - private String language; - - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - mContext = getUimaContext(); - - Logger mLogger = mContext.getLogger(); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Doccat Trainer."); - } - - mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.LANGUAGE_PARAMETER); - } - - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - - String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); - - Type mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); - - String categoryTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - "opennlp.uima.doccat.CategoryType"); - - mCategoryType = CasConsumerUtil.getType(typeSystem, categoryTypeName); - - // get feature name - String categoryFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext, - "opennlp.uima.doccat.CategoryFeature"); - - mCategoryFeature = mCategoryType.getFeatureByBaseName(categoryFeatureName); - } - - public void processCas(CAS cas) throws ResourceProcessException { - - FSIndex categoryIndex = cas.getAnnotationIndex(mCategoryType); - - if (categoryIndex.size() > 0) { - AnnotationFS categoryAnnotation = - (AnnotationFS) categoryIndex.iterator().next(); - - // add to event collection - - DocumentSample sample = new DocumentSample( - categoryAnnotation.getStringValue(mCategoryFeature), - cas.getDocumentText()); - - documentSamples.add(sample); - } - } - - public void collectionProcessComplete(ProcessTrace trace) - throws ResourceProcessException, IOException { - - GIS.PRINT_MESSAGES = false; - - TrainingParameters params = new TrainingParameters(); - params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); - params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0)); - - DoccatModel categoryModel = DocumentCategorizerME.train(language, - ObjectStreamUtils.createObjectStream(documentSamples), params, new DoccatFactory()); - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + mModelName); - - OpennlpUtil.serialize(categoryModel, modelFile); - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Destroys the current instance. - */ - public void destroy() { - // dereference to allow garbage collection - documentSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java deleted file mode 100644 index af00f5844..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.namefind; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool; -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.namefind.BioCodec; -import opennlp.tools.namefind.NameFinderME; -import opennlp.tools.namefind.NameSample; -import opennlp.tools.namefind.NameSampleDataStream; -import opennlp.tools.namefind.TokenNameFinderFactory; -import opennlp.tools.namefind.TokenNameFinderModel; -import opennlp.tools.util.InputStreamFactory; -import opennlp.tools.util.MarkableFileInputStreamFactory; -import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.PlainTextByLineStream; -import opennlp.tools.util.Span; -import opennlp.tools.util.TrainingParameters; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.ContainingConstraint; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.SampleTraceStream; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP NameFinder trainer. - *

- * Mandatory parameters - * - * - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.ModelName The name of the model file
String opennlp.uima.Language The language code
String opennlp.uima.SentenceType The full name of the sentence type
String opennlp.uima.TokenType The full name of the token type
String opennlp.uima.NameType The full name of the name type
- * - * Optional parameters - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.opennlp.uima.TrainingParamsFileTraining Parameters Properties file
String opennlp.uima.FeatureGeneratorFileFeature Generator definition file which contain the feature generator configuration
String opennlp.uima.FeatureGeneratorResourcesFeature Generator resources dictionary
String opennlp.uima.AdditionalTrainingDataFileTraining file which contains additional data in the OpenNLP format
String opennlp.uima.AdditionalTrainingDataEncodingEncoding of the additional training data
String opennlp.uima.SampleTraceFileAll training samples are traced to this file
String opennlp.uima.SampleTraceFileEncodingEncoding of the sample trace file
- *

- * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated - -public final class NameFinderTrainer extends CasConsumer_ImplBase { - - private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER = - "opennlp.uima.FeatureGeneratorFile"; - private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER = - "opennlp.uima.FeatureGeneratorResources"; - - private Logger logger; - - private String modelPath; - - private byte featureGeneratorDefinition[]; - - private File featureGeneratorResourceDir; - - private String additionalTrainingDataFile; - - private String additionalTrainingDataEncoding; - - private File sampleTraceFile = null; - - private String sampleTraceFileEncoding = null; - - private Type sentenceType; - - private Type tokenType; - - private Type nameType; - - private String language; - - // TODO: Keeping all events in memory limits the size of the training corpus - // Possible solutions: - // - Write all events to disk - // - Directly start indexing with a blocking sample stream, the indexer will then write everything - // to disk or could store the events much more space efficient in memory - - private List nameFinderSamples = new ArrayList<>(); - private TrainingParameters trainingParams; - - /** - * Initializes the current instance. - */ - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - logger = getUimaContext().getLogger(); - - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Initializing the OpenNLP Name Trainer."); - } - - modelPath = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), - UimaUtil.LANGUAGE_PARAMETER); - - trainingParams = OpennlpUtil.loadTrainingParams(CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), UimaUtil.TRAINING_PARAMS_FILE_PARAMETER), true); - - String featureGeneratorDefinitionFile = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER); - - if (featureGeneratorDefinitionFile != null) { - try { - featureGeneratorDefinition = OpennlpUtil.loadBytes(new File(featureGeneratorDefinitionFile)); - } catch (IOException e) { - throw new ResourceInitializationException(e); - } - - String featureGeneratorResourcesDirName = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), FEATURE_GENERATOR_RESOURCES_PARAMETER); - - if (featureGeneratorResourcesDirName != null) { - featureGeneratorResourceDir = new File(featureGeneratorResourcesDirName); - } - } - - additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE); - - // If the additional training data is specified, the encoding must be provided! - if (additionalTrainingDataFile != null) { - additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter( - getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING); - } - - String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFile"); - - if (sampleTraceFileName != null) { - sampleTraceFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + sampleTraceFileName); - sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFileEncoding"); - } - } - - /** - * Initialize the current instance with the given type system. - */ - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - - String sentenceTypeName = - CasConsumerUtil.getRequiredStringParameter(getUimaContext(), - UimaUtil.SENTENCE_TYPE_PARAMETER); - - sentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); - - String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), - UimaUtil.TOKEN_TYPE_PARAMETER); - - tokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); - - String nameTypeName = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), - NameFinder.NAME_TYPE_PARAMETER); - - nameType = CasConsumerUtil.getType(typeSystem, nameTypeName); - } - - /** - * Creates a {@link List} from an {@link Iterator}. - * - * @param - * @param it - * @return - */ - private static List iteratorToList(Iterator it) { - List list = new LinkedList<>(); - - while (it.hasNext()) { - list.add(it.next()); - } - - return list; - } - - private static boolean isContaining(AnnotationFS annotation, - AnnotationFS containtedAnnotation) { - boolean isStartContaining = annotation.getBegin() <= containtedAnnotation.getBegin(); - return isStartContaining && annotation.getEnd() >= containtedAnnotation.getEnd(); - - } - - /** - * Creates the name spans out of a list of token annotations and a list of entity annotations. - *

- * The name spans for the name finder use a token index and not on a character index which - * is used by the entity annotations. - * - * @param tokenList - * @param entityAnnotations - * @return - */ - private static Span[] createNames(List tokenList, List entityAnnotations) { - - List nameList = new LinkedList<>(); - - AnnotationFS currentEntity = null; - - int startIndex = -1; - int index = 0; - for (AnnotationFS token : tokenList) { - for (AnnotationFS entity : entityAnnotations) { - - if (!isContaining(entity, token)) { - // ... end of an entity - if (currentEntity == entity) { - nameList.add(new Span(startIndex, index)); - - startIndex = -1; - currentEntity = null; - // break; - } else { - continue; - } - } - - // is this token start of new entity - if (currentEntity == null && isContaining(entity, token)) { - startIndex = index; - - currentEntity = entity; - } - } - - index++; - } - - if (currentEntity != null) { - Span name = new Span(startIndex, index); - nameList.add(name); - } - - return nameList.toArray(new Span[nameList.size()]); - } - - /* - * Process the given CAS object. - */ - /** - * Process the given CAS object. - */ - public void processCas(CAS cas) { - FSIndex sentenceIndex = cas.getAnnotationIndex(sentenceType); - - boolean isClearAdaptiveData = true; - - for (AnnotationFS sentenceAnnotation : sentenceIndex) { - ContainingConstraint sentenceContainingConstraint = new ContainingConstraint( - sentenceAnnotation); - - FSIndex tokenAnnotations = cas.getAnnotationIndex(tokenType); - - Iterator containingTokens = cas.createFilteredIterator(tokenAnnotations - .iterator(), sentenceContainingConstraint); - - FSIndex allNames = cas.getAnnotationIndex(nameType); - - Iterator containingNames = cas.createFilteredIterator(allNames.iterator(), - sentenceContainingConstraint); - - List tokenList = iteratorToList(containingTokens); - - Span names[] = createNames(tokenList, iteratorToList(containingNames)); - - // create token array - String tokenArray[] = new String[tokenList.size()]; - - for (int i = 0; i < tokenArray.length; i++) { - tokenArray[i] = tokenList.get(i).getCoveredText(); - } - - NameSample trainingSentence = new NameSample(tokenArray, names, null, isClearAdaptiveData); - - if (trainingSentence.getSentence().length != 0) { - nameFinderSamples.add(trainingSentence); - - if (isClearAdaptiveData) { - isClearAdaptiveData = false; - } - } else { - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Sentence without tokens: " + - sentenceAnnotation.getCoveredText()); - } - } - } - } - - /** - * Called if the processing is finished, this method - * does the training. - */ - public void collectionProcessComplete(ProcessTrace trace) - throws ResourceProcessException, IOException { - - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Collected " + nameFinderSamples.size() + - " name samples."); - } - - GIS.PRINT_MESSAGES = false; - - // create training stream ... - ObjectStream samples = ObjectStreamUtils.createObjectStream(nameFinderSamples); - - Writer samplesOut = null; - TokenNameFinderModel nameModel; - try { - if (additionalTrainingDataFile != null) { - - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Using additional training data file: " + additionalTrainingDataFile); - } - - InputStreamFactory additionalTrainingDataIn = new MarkableFileInputStreamFactory( - new File(additionalTrainingDataFile)); - Charset additionalTrainingDataCharset = Charset - .forName(additionalTrainingDataEncoding); - - ObjectStream additionalSamples = new NameSampleDataStream( - new PlainTextByLineStream(additionalTrainingDataIn, - additionalTrainingDataCharset)); - - samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples); - } - - if (sampleTraceFile != null) { - samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream<>(samples, samplesOut); - } - - Map resourceMap; - - if (featureGeneratorResourceDir != null) { - resourceMap = TokenNameFinderTrainerTool.loadResources(featureGeneratorResourceDir, null); - } - else { - resourceMap = Collections.emptyMap(); - } - - nameModel = NameFinderME.train(language, null, samples, trainingParams, - new TokenNameFinderFactory(featureGeneratorDefinition, resourceMap, new BioCodec())); - } - finally { - - if (samplesOut != null) { - samplesOut.close(); - } - } - - // dereference to allow garbage collection - nameFinderSamples = null; - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + modelPath); - - OpennlpUtil.serialize(nameModel, modelFile); - - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Model was written to: " + modelFile.getAbsolutePath()); - } - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Destroys the current instance. - */ - public void destroy() { - // dereference to allow garbage collection - nameFinderSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java deleted file mode 100644 index be7651e24..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.postag; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Feature; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.postag.POSDictionary; -import opennlp.tools.postag.POSModel; -import opennlp.tools.postag.POSSample; -import opennlp.tools.postag.POSTaggerFactory; -import opennlp.tools.postag.POSTaggerME; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.TrainingParameters; -import opennlp.uima.util.AnnotatorUtil; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.ContainingConstraint; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP POSTagger trainer. - *

- * Mandatory parameters - * - * - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.ModelName The name of the model file
String opennlp.uima.SentenceType The full name of the sentence type
String opennlp.uima.TokenType The full name of the token type
String pennlp.uima.POSFeature The name of the token pos feature, - * the feature must be of type String
String opennlp.uima.TagDictionaryName
- * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated - -public class POSTaggerTrainer extends CasConsumer_ImplBase { - - public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName"; - - private UimaContext mContext; - - private Type mSentenceType; - - private Type mTokenType; - - private String mModelName; - - private Feature mPOSFeature; - - private Logger mLogger; - - private List mPOSSamples = new ArrayList<>(); - - private String language; - - private POSDictionary tagDictionary; - - /** - * Initializes the current instance. - */ - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - mContext = getUimaContext(); - - mLogger = mContext.getLogger(); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP " + - "POSTagger trainer."); - } - - mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.LANGUAGE_PARAMETER); - - String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext, - TAG_DICTIONARY_NAME); - - if (tagDictionaryName != null) { - try (InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName)) { - tagDictionary = POSDictionary.create(dictIn); - } catch (final IOException e) { - // if this fails just print error message and continue - final String message = "IOException during tag dictionary reading, " - + "running without tag dictionary: " + e.getMessage(); - - if (this.mLogger.isLoggable(Level.WARNING)) { - this.mLogger.log(Level.WARNING, message); - } - } - } - } - - /** - * Initialize the current instance with the given type system. - */ - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " + - sentenceTypeName); - } - - mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); - - String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.TOKEN_TYPE_PARAMETER); - - mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); - - String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.POS_FEATURE_PARAMETER); - - mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName); - } - - /** - * Process the given CAS object. - */ - public void processCas(CAS cas) { - - FSIndex sentenceAnnotations = cas.getAnnotationIndex(mSentenceType); - - for (AnnotationFS sentence : sentenceAnnotations) { - process(cas, sentence); - } - } - - private void process(CAS tcas, AnnotationFS sentence) { - - FSIndex allTokens = tcas.getAnnotationIndex(mTokenType); - - ContainingConstraint containingConstraint = - new ContainingConstraint(sentence); - - List tokens = new ArrayList<>(); - List tags = new ArrayList<>(); - - Iterator containingTokens = tcas.createFilteredIterator( - allTokens.iterator(), containingConstraint); - - while (containingTokens.hasNext()) { - - AnnotationFS tokenAnnotation = containingTokens.next(); - - String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature); - - tokens.add(tokenAnnotation.getCoveredText().trim()); - tags.add(tag); - } - - mPOSSamples.add(new POSSample(tokens, tags)); - } - - /** - * Called if the processing is finished, this method - * does the training. - */ - public void collectionProcessComplete(ProcessTrace trace) - throws ResourceProcessException, IOException { - - GIS.PRINT_MESSAGES = false; - - TrainingParameters params = new TrainingParameters(); - params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); - params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5)); - - POSModel posTaggerModel = POSTaggerME.train(language, - ObjectStreamUtils.createObjectStream(mPOSSamples), - params, new POSTaggerFactory(null, tagDictionary)); - - // dereference to allow garbage collection - mPOSSamples = null; - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + mModelName); - - OpennlpUtil.serialize(posTaggerModel, modelFile); - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Releases allocated resources. - */ - public void destroy() { - // dereference to allow garbage collection - mPOSSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java deleted file mode 100644 index 99600b81a..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.sentdetect; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.sentdetect.SentenceDetectorFactory; -import opennlp.tools.sentdetect.SentenceDetectorME; -import opennlp.tools.sentdetect.SentenceModel; -import opennlp.tools.sentdetect.SentenceSample; -import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.Span; -import opennlp.tools.util.TrainingParameters; -import opennlp.tools.util.model.ModelUtil; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.SampleTraceStream; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP SentenceDetector trainer. - *

- * Mandatory parameters - * - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.ModelName The name of the model file
String opennlp.uima.SentenceType The full name of the sentence type
String opennlp.uima.EOSCharsA string containing end-of-sentence characters
- * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated -public final class SentenceDetectorTrainer extends CasConsumer_ImplBase { - - private List sentenceSamples = new ArrayList<>(); - - private Type mSentenceType; - - private String mModelName; - - private String language = "en"; - - private UimaContext mContext; - - private String eosChars; - - private File sampleTraceFile; - - private String sampleTraceFileEncoding; - - /** - * Initializes the current instance. - */ - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - mContext = getUimaContext(); - - Logger mLogger = mContext.getLogger(); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " + - "trainer."); - } - - mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.LANGUAGE_PARAMETER); - - eosChars = CasConsumerUtil.getOptionalStringParameter(mContext, "opennlp.uima.EOSChars"); - - - String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFile"); - - if (sampleTraceFileName != null) { - sampleTraceFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + sampleTraceFileName); - sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFileEncoding"); - } - } - - /** - * Initializes the current instance with the given type system. - */ - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - - String sentenceTypeName = - CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); - - mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); - } - - /** - * Process the given CAS object. - */ - public void processCas(CAS cas) { - - FSIndex sentenceIndex = cas.getAnnotationIndex(mSentenceType); - - Span[] sentSpans = new Span[sentenceIndex.size()]; - - int i = 0; - for (AnnotationFS sentenceAnnotation : sentenceIndex) { - sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()); - } - - // TODO: The line cleaning should be done more carefully - sentenceSamples.add(new SentenceSample(cas.getDocumentText().replace('\n', ' '), sentSpans)); - } - - /** - * Called if the processing is finished, this method - * does the training. - */ - public void collectionProcessComplete(ProcessTrace trace) - throws ResourceProcessException, IOException { - GIS.PRINT_MESSAGES = false; - - char eos[] = null; - if (eosChars != null) { - eos = eosChars.toCharArray(); - } - - SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( - null, language, true, null, eos); - - // TrainingParameters mlParams = ModelUtil.createTrainingParameters(100, 5); - TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters(); - ObjectStream samples = ObjectStreamUtils.createObjectStream(sentenceSamples); - - Writer samplesOut; - - if (sampleTraceFile != null) { - samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream<>(samples, samplesOut); - } - - SentenceModel sentenceModel = SentenceDetectorME.train(language, samples, - sdFactory, mlParams); - - // dereference to allow garbage collection - sentenceSamples = null; - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + mModelName); - - OpennlpUtil.serialize(sentenceModel, modelFile); - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Releases allocated resources. - */ - public void destroy() { - // dereference to allow garbage collection - sentenceSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java deleted file mode 100644 index 35f24a2c4..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.tokenize; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - -import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.tokenize.TokenSample; -import opennlp.tools.tokenize.TokenSampleStream; -import opennlp.tools.tokenize.TokenizerFactory; -import opennlp.tools.tokenize.TokenizerME; -import opennlp.tools.tokenize.TokenizerModel; -import opennlp.tools.util.InputStreamFactory; -import opennlp.tools.util.MarkableFileInputStreamFactory; -import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.ObjectStreamUtils; -import opennlp.tools.util.PlainTextByLineStream; -import opennlp.tools.util.Span; -import opennlp.tools.util.model.ModelUtil; -import opennlp.uima.util.CasConsumerUtil; -import opennlp.uima.util.ContainingConstraint; -import opennlp.uima.util.OpennlpUtil; -import opennlp.uima.util.SampleTraceStream; -import opennlp.uima.util.UimaUtil; - -/** - * OpenNLP Tokenizer trainer. - *

- * Mandatory parameters - * - * - * - * - * - * - *
Type Name Description
String opennlp.uima.ModelName The name of the model file
String opennlp.uima.SentenceType The full name of the sentence type
String opennlp.uima.TokenType The full name of the token type
- *

- * Optional parameters - * - * - * - * - *
Type Name Description
Boolean opennlp.uima.tokenizer.IsSkipAlphaNumerics
- * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated - -public final class TokenizerTrainer extends CasConsumer_ImplBase { - - private static final String IS_ALPHA_NUMERIC_OPTIMIZATION = - "opennlp.uima.tokenizer.IsAlphaNumericOptimization"; - - private List tokenSamples = new ArrayList<>(); - - private UimaContext mContext; - - private Type mSentenceType; - - private Type mTokenType; - - private String mModelName; - - private String additionalTrainingDataFile; - - private String additionalTrainingDataEncoding; - - private String language; - - private Boolean isSkipAlphaNumerics; - - private Logger mLogger; - - private String sampleTraceFileEncoding; - - private File sampleTraceFile; - - /** - * Initializes the current instance. - */ - public void initialize() throws ResourceInitializationException { - - super.initialize(); - - mContext = getUimaContext(); - - mLogger = mContext.getLogger(); - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Tokenizer trainer."); - } - - mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.MODEL_PARAMETER); - - language = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.LANGUAGE_PARAMETER); - - isSkipAlphaNumerics = - CasConsumerUtil.getOptionalBooleanParameter( - mContext, IS_ALPHA_NUMERIC_OPTIMIZATION); - - if (isSkipAlphaNumerics == null) { - isSkipAlphaNumerics = false; - } - - additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE); - - // If the additional training data is specified, the encoding must be provided! - if (additionalTrainingDataFile != null) { - additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter( - getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING); - } - - String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFile"); - - if (sampleTraceFileName != null) { - sampleTraceFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + sampleTraceFileName); - sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter( - getUimaContext(), "opennlp.uima.SampleTraceFileEncoding"); - } - } - - /** - * Initialize the current instance with the given type system. - */ - public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { - - String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); - - mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); - - String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.TOKEN_TYPE_PARAMETER); - - mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); - } - - /** - * Process the given CAS object. - */ - public void processCas(CAS cas) { - - FSIndex sentenceAnnotations = cas.getAnnotationIndex(mSentenceType); - - for (AnnotationFS sentence : sentenceAnnotations) { - process(cas, sentence); - } - } - - private void process(CAS tcas, AnnotationFS sentence) { - FSIndex allTokens = tcas.getAnnotationIndex(mTokenType); - - ContainingConstraint containingConstraint = - new ContainingConstraint(sentence); - - Iterator containingTokens = tcas.createFilteredIterator( - allTokens.iterator(), containingConstraint); - - List openNLPSpans = new LinkedList<>(); - - while (containingTokens.hasNext()) { - AnnotationFS tokenAnnotation = containingTokens.next(); - - openNLPSpans.add(new Span(tokenAnnotation.getBegin() - - sentence.getBegin(), tokenAnnotation.getEnd() - - sentence.getBegin())); - } - - Span[] spans = openNLPSpans.toArray(new Span[openNLPSpans.size()]); - - Arrays.sort(spans); - - tokenSamples.add(new TokenSample(sentence.getCoveredText(), spans)); - } - - /** - * Called if the processing is finished, this method - * does the training. - */ - public void collectionProcessComplete(ProcessTrace arg0) - throws ResourceProcessException, IOException { - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Collected " + tokenSamples.size() + - " token samples."); - } - - GIS.PRINT_MESSAGES = false; - - ObjectStream samples = ObjectStreamUtils.createObjectStream(tokenSamples); - - // Write stream to disk ... - // if trace file - // serialize events ... - - Writer samplesOut; - TokenizerModel tokenModel; - - if (additionalTrainingDataFile != null) { - - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Using addional training data file: " + additionalTrainingDataFile); - } - - InputStreamFactory additionalTrainingDataIn = new MarkableFileInputStreamFactory( - new File(additionalTrainingDataFile)); - - Charset additionalTrainingDataCharset = Charset - .forName(additionalTrainingDataEncoding); - - ObjectStream additionalSamples = new TokenSampleStream( - new PlainTextByLineStream(additionalTrainingDataIn, - additionalTrainingDataCharset)); - - samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples); - } - - if (sampleTraceFile != null) { - samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream<>(samples, samplesOut); - } - - tokenModel = TokenizerME.train(samples, - TokenizerFactory.create(null, language, null, isSkipAlphaNumerics, null), - ModelUtil.createDefaultTrainingParameters()); - - // dereference to allow garbage collection - tokenSamples = null; - - File modelFile = new File(getUimaContextAdmin().getResourceManager() - .getDataPath() + File.separatorChar + mModelName); - - OpennlpUtil.serialize(tokenModel, modelFile); - } - - /** - * The trainer is not stateless. - */ - public boolean isStateless() { - return false; - } - - /** - * Releases allocated resources. - */ - public void destroy() { - // dereference to allow garbage collection - tokenSamples = null; - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java deleted file mode 100644 index 69cdac059..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.util; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.uima.UimaContext; -import org.apache.uima.cas.Feature; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.resource.ResourceAccessException; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; - -import opennlp.tools.dictionary.Dictionary; - -/** - * This is a util class for cas consumer. - * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated - -public final class CasConsumerUtil { - - private CasConsumerUtil() { - // this is a util class must not be instanciated - } - - public static InputStream getOptionalResourceAsStream(UimaContext context, - String name) throws ResourceInitializationException { - try { - return context.getResourceAsStream(name); - } catch (ResourceAccessException e) { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"There is an internal error in the UIMA SDK: " + - e.getMessage(), - e}); - } - } - - /** - * Retrieves a resource as stream from the given context. - * - * @param context - * @param name - * @return the stream - * @throws ResourceInitializationException - */ - public static InputStream getResourceAsStream(UimaContext context, - String name) throws ResourceInitializationException { - - InputStream inResource = getOptionalResourceAsStream(context, name); - - if (inResource == null) { - throw new ResourceInitializationException( - ResourceAccessException.STANDARD_MESSAGE_CATALOG, - new Object[] {"Unable to load resource!"}); - } - - return inResource; - } - - /** - * Retrieves a type from the given type system. - * - * @param typeSystem - * @param name - * @return the type - * @throws ResourceInitializationException - */ - public static Type getType(TypeSystem typeSystem, String name) - throws ResourceInitializationException { - Type type = getOptionalType(typeSystem, name); - - if (type == null) { - throw new ResourceInitializationException( - ResourceInitializationException.INCOMPATIBLE_RANGE_TYPES, - new Object[] {"Unable to retrieve " + name + " type!"}); - } - - return type; - } - - /** - * Retrieves a type from the given type system. - * - * @param typeSystem - * @param name - * @return the type - * @throws ResourceInitializationException - */ - public static Type getOptionalType(TypeSystem typeSystem, String name) - throws ResourceInitializationException { - return typeSystem.getType(name); - } - - /** - * Retrieves a required parameter form the given context. - * - * @param context - * @param parameter - * @return the parameter - * @throws ResourceInitializationException - */ - public static String getRequiredStringParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - String value = getOptionalStringParameter(context, parameter); - - checkForNull(value, parameter); - - return value; - } - - /** - * Retrieves a required parameter form the given context. - * - * @param context - * @param parameter - * @return the parameter - * @throws ResourceInitializationException - */ - public static Integer getRequiredIntegerParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Integer value = getOptionalIntegerParameter(context, parameter); - - checkForNull(value, parameter); - - return value; - } - - /** - * Retrieves a required parameter form the given context. - * - * @param context - * @param parameter - * @return the parameter - * @throws ResourceInitializationException - */ - public static Float getRequiredFloatParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Float value = getOptionalFloatParameter(context, parameter); - - checkForNull(value, parameter); - - return value; - } - - /** - * Retrieves a required boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter - * @throws ResourceInitializationException - */ - public static Boolean getRequiredBooleanParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Boolean value = getOptionalBooleanParameter(context, parameter); - - checkForNull(value, parameter); - - return value; - } - - private static void checkForNull(Object value, String parameterName) - throws ResourceInitializationException { - - if (value == null) { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The " + parameterName + " is a required parameter!"}); - } - } - - /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ - public static String getOptionalStringParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value == null) { - return null; - } else if (value instanceof String) { - return (String) value; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have the expected type String"}); - } - } - - public static String[] getOptionalStringArrayParameter(UimaContext context, - String parameter) - throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value instanceof String[]) { - return (String[]) value; - } else if (value == null) { - return new String[0]; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter - + " does not have the expected type String array"}); - } - } - - /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ - public static Integer getOptionalIntegerParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value == null) { - return null; - } else if (value instanceof Integer) { - return (Integer) value; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have the expected type Integer"}); - } - } - - /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @param defaultValue value to use if the optional parameter is not set - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ - public static Integer getOptionalIntegerParameter(UimaContext context, String parameter, - int defaultValue) throws ResourceInitializationException { - - Integer value = getOptionalIntegerParameter(context, parameter); - - if (value == null) { - value = defaultValue; - } - - return value; - } - - /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ - public static Float getOptionalFloatParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value == null) { - return null; - } else if (value instanceof Float) { - return (Float) value; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have the expected type Float"}); - } - } - - /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ - public static Boolean getOptionalBooleanParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value == null) { - return null; - } else if (value instanceof Boolean) { - return (Boolean) value; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have the expected type Boolean"}); - } - } - - private static Object getOptionalParameter(UimaContext context, - String parameter) { - - Object value = context.getConfigParameterValue(parameter); - - Logger logger = context.getLogger(); - - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, parameter + " = " + - (value != null ? value.toString() : "not set")); - } - - return value; - } - - /** - * Checks if the given feature has the expected type otherwise - * an exception is thrown. - * - * @param feature - * @param expectedType - * @throws ResourceInitializationException - if type does not match - */ - public static void checkFeatureType(Feature feature, String expectedType) - throws ResourceInitializationException { - if (!feature.getRange().getName().equals(expectedType)) { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The Feature " + feature.getName() + - " must be of type " + expectedType + " !" - }); - } - } - - public static Dictionary createOptionalDictionary(UimaContext context, String parameter) - throws ResourceInitializationException { - String dictionaryName = CasConsumerUtil.getOptionalStringParameter( - context, parameter); - - Dictionary dictionary = null; - - if (dictionaryName != null) { - - Logger logger = context.getLogger(); - - try { - - InputStream dictIn = CasConsumerUtil.getOptionalResourceAsStream(context, - dictionaryName); - - if (dictIn == null) { - String message = "The dictionary file " + dictionaryName + - " does not exist!"; - - if (logger.isLoggable(Level.WARNING)) { - logger.log(Level.WARNING, message); - } - - return null; - } - - dictionary = new Dictionary(dictIn); - - } catch (IOException e) { - // if this fails just print error message and continue - String message = "IOException during dictionary reading, " - + "running without dictionary: " + e.getMessage(); - - if (logger.isLoggable(Level.WARNING)) { - logger.log(Level.WARNING, message); - } - } - - return dictionary; - } else { - return null; - } - } -} diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/SampleTraceStream.java b/opennlp-uima/src/main/java/opennlp/uima/util/SampleTraceStream.java deleted file mode 100644 index 6cd1a5480..000000000 --- a/opennlp-uima/src/main/java/opennlp/uima/util/SampleTraceStream.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.uima.util; - -import java.io.IOException; -import java.io.Writer; - -import opennlp.tools.util.FilterObjectStream; -import opennlp.tools.util.ObjectStream; - -/** - * Writes the samples which are processed by this stream to a file. - * In the case the underlying stream is reseted this stream will - * detect that, and does not write the samples again to the output writer. - * @param - * - * @deprecated will be removed after 1.7.1 release, there is no replacement - */ -@Deprecated -public class SampleTraceStream extends FilterObjectStream { - - private final Writer out; - - private boolean wasReseted = false; - - public SampleTraceStream(ObjectStream samples, Writer out) { - super(samples); - - this.out = out; - } - - @Override - public void reset() throws IOException, UnsupportedOperationException { - super.reset(); - - wasReseted = true; - } - - public T read() throws IOException { - - T sample = samples.read(); - - if (sample != null && !wasReseted) { - out.append(sample.toString()); - out.append('\n'); - } - - return sample; - } -}