Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add customization of stemming function #5

Merged
merged 4 commits into from Aug 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 30 additions & 4 deletions README.md
Expand Up @@ -36,6 +36,32 @@ A [Naive Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) machine le

## Usage

Install by adding `:simple_bayes` and optionally `:stemmer` to `deps` in your
`mix.exs`:

```elixir
defp deps do
[ {:simple_bayes, "~> 0.10.0"},
{:stemmer, "~> 1.0"} # Optional, if you want to use stemming
]
end
```

Ensure `:simple_bayes` and optionally `:stemmer` are started before your
application:

```elixir
def application do
[ applications: [
:logger,
:simple_bayes,
:stemmer # Optional, if you want to use stemming
]
]
end
```


```elixir
bayes = SimpleBayes.init()
|> SimpleBayes.train(:apple, "red sweet")
Expand Down Expand Up @@ -65,7 +91,7 @@ bayes |> SimpleBayes.classify("Maybe green maybe red but definitely round and sw
# ]
```

With and without word stemming:
With and without word stemming (requires a stem function, we recommend [Stemmer](https://github.com/fredwu/stemmer)):

```elixir
SimpleBayes.init()
Expand All @@ -77,7 +103,7 @@ SimpleBayes.init()
# apple: 0.05719389206673358
# ]

SimpleBayes.init(stem: true)
SimpleBayes.init(stem: &Stemmer.stem/1) # Or any other stemming function
|> SimpleBayes.train(:apple, "buying apple")
|> SimpleBayes.train(:banana, "buy banana")
|> SimpleBayes.classify("buy apple")
Expand All @@ -96,7 +122,7 @@ config :simple_bayes, model: :multinomial
config :simple_bayes, storage: :memory
config :simple_bayes, default_weight: 1
config :simple_bayes, smoothing: 0
config :simple_bayes, stem: false
config :simple_bayes, stem: false # or a stemming function
config :simple_bayes, top: nil
config :simple_bayes, stop_words: ~w(
a about above after again against all am an and any are aren't as at be
Expand All @@ -123,7 +149,7 @@ SimpleBayes.init(
storage: :memory,
default_weight: 1,
smoothing: 0,
stem: false,
stem: false, # or a stemming function
top: nil,
stop_words: []
)
Expand Down
11 changes: 4 additions & 7 deletions lib/simple_bayes/trainer/token_stemmer.ex
@@ -1,18 +1,15 @@
defmodule SimpleBayes.Trainer.TokenStemmer do
@doc """
Stems the word, or passes the word through.
Stems the word using `stemmer`. Passes the word through if it is false.

## Examples

iex> SimpleBayes.Trainer.TokenStemmer.stem("buying", nil)
"buying"

iex> SimpleBayes.Trainer.TokenStemmer.stem("buying", false)
"buying"

iex> SimpleBayes.Trainer.TokenStemmer.stem("buying", true)
iex> SimpleBayes.Trainer.TokenStemmer.stem("buying", &Stemmer.stem/1)
"buy"
"""
def stem(word, true), do: Stemmer.stem(word)
def stem(word, _), do: word
def stem(word, stemmer) when is_function(stemmer), do: stemmer.(word)
def stem(word, _), do: word
end
4 changes: 2 additions & 2 deletions mix.exs
Expand Up @@ -17,16 +17,16 @@ defmodule SimpleBayes.Mixfile do
end

def application do
[applications: [:logger, :math, :decimal, :stemmer]]
[applications: [:logger, :math, :decimal]]
end

defp deps do
[
{:ex_doc, ">= 0.0.0", only: :dev},
{:faker, ">= 0.0.0", only: :test},
{:stemmer, "~> 1.0.0", only: :test},
{:math, ">= 0.0.0"},
{:decimal, ">= 0.0.0"},
{:stemmer, "~> 1.0.0"}
]
end

Expand Down
3 changes: 1 addition & 2 deletions mix.lock
Expand Up @@ -2,5 +2,4 @@
"earmark": {:hex, :earmark, "1.0.1", "2c2cd903bfdc3de3f189bd9a8d4569a075b88a8981ded9a0d95672f6e2b63141", [:mix], []},
"ex_doc": {:hex, :ex_doc, "0.13.0", "aa2f8fe4c6136a2f7cfc0a7e06805f82530e91df00e2bff4b4362002b43ada65", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]},
"math": {:hex, :math, "0.2.0", "e7d94733ef0f3a9051a7de58a8f30aa13bee4c7824020580a39dd19174f9cadc", [:mix], []},
"faker": {:hex, :faker, "0.6.0", "2d2ff0879d6b10fab5fb47eb2c1149b811e4af780a17022aa249deb3a7156d2b", [:mix], []},
"stemmer": {:hex, :stemmer, "1.0.0", "f39cc4a8ee63eb2b39daa3d43e4dafef42087e24c2f6fafea0be6e6ba2ecf862", [:mix], []}}
"faker": {:hex, :faker, "0.6.0", "2d2ff0879d6b10fab5fb47eb2c1149b811e4af780a17022aa249deb3a7156d2b", [:mix], []}}
2 changes: 1 addition & 1 deletion test/simple_bayes/model/bernoulli_test.exs
Expand Up @@ -90,7 +90,7 @@ defmodule SimpleBayes.BernoulliTest do
end

test "stemming" do
result = SimpleBayes.init(model: :bernoulli, stem: true)
result = SimpleBayes.init(model: :bernoulli, stem: &Stemmer.stem/1)
|> SimpleBayes.train(:apple, "buying apple")
|> SimpleBayes.train(:banana, "buy banana")
|> SimpleBayes.classify("buy apple")
Expand Down
2 changes: 1 addition & 1 deletion test/simple_bayes/model/binarized_multinomial_test.exs
Expand Up @@ -77,7 +77,7 @@ defmodule SimpleBayes.BinarizedMultinomialTest do
end

test "stemming" do
result = SimpleBayes.init(model: :binarized_multinomial, stem: true)
result = SimpleBayes.init(model: :binarized_multinomial, stem: &Stemmer.stem/1)
|> SimpleBayes.train(:apple, "buying apple")
|> SimpleBayes.train(:banana, "buy banana")
|> SimpleBayes.classify("buy apple")
Expand Down
2 changes: 1 addition & 1 deletion test/simple_bayes/model/multinomial_test.exs
Expand Up @@ -55,7 +55,7 @@ defmodule SimpleBayes.MultinomialTest do
end

test "stemming" do
result = SimpleBayes.init(stem: true)
result = SimpleBayes.init(stem: &Stemmer.stem/1)
|> SimpleBayes.train(:apple, "buying apple")
|> SimpleBayes.train(:banana, "buy banana")
|> SimpleBayes.classify("buy apple")
Expand Down