Source: Natural Language Processing with Java Cookbook [Packt](https://www.packtpub.com/product/natural-language-processing-with-java-cookbook/9781789801156)

## Tokenizing with OpenNLP Library

### Load POM dependencies from [OpenNLP](https://opennlp.apache.org/maven-dependency.html)

In [2]:
%%loadFromPOM
<repositories>
  <repository>
    <id>apache opennlp snapshot</id>
    <url>https://repository.apache.org/content/repositories/snapshots/</url>
  </repository>
</repositories>

<dependency>
    <groupId>org.apache.opennlp</groupId>
    <artifactId>opennlp-tools</artifactId>
    <version>1.9.0</version>
</dependency>

Use **SimpleTokenizer**

In [5]:
import opennlp.tools.tokenize.SimpleTokenizer;

public void tokenizeSentence(String sentence) {
    SimpleTokenizer simpletkn = SimpleTokenizer.INSTANCE;
    String tokenList[] = simpletkn.tokenize(sentence);
    for (String token: tokenList) {
        System.out.println(token);
    }
}

String phrase = "This is the best day of my life, as some would say.";
tokenizeSentence(phrase);

This
is
the
best
day
of
my
life
,
as
some
would
say
.


## Tokenizing with OpenNLP's Maximum Entropy 

In [7]:
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

In [15]:
public void tokenizeMaxEntropy(String phrase){
    try (InputStream modelInputStream = new FileInputStream(new File("../models/", "en-token.bin"))) {
        TokenizerModel tknModel = new TokenizerModel(modelInputStream);
        Tokenizer tokenizer = new TokenizerME(tknModel);
        
        String tokenList[] = tokenizer.tokenize(phrase);
        for (String token: tokenList) { System.out.println(token);}
    } catch (FileNotFoundException e) {
        System.out.println("File is not found");
    } catch (IOException e) {
        // Handle
    }
}

String sampleText = "This is the best day indeed!";
tokenizeMaxEntropy(sampleText)

This
is
the
best
day
indeed
!


## Tokenizing manually with Scanner

In [8]:
import java.util.ArrayList;
import java.util.Scanner;

public void tokenizeManually(String phrase){
    Scanner scanner = new Scanner(phrase);
    ArrayList<String> list = new ArrayList<>();
    while (scanner.hasNext()) {
        String token = scanner.next();
        list.add(token);
    }
    
    for (String token : list) { System.out.println(token); }
}

String phrase = "This is the best day of my life, as some would say.";
tokenizeManually(phrase);

This
is
the
best
day
of
my
life,
as
some
would
say.
