In [1]:
using Test

In [73]:
function cleansentence(sentence)
    commaless = replace(sentence, "," => " ")
    quoteless = replace(commaless, r"\'\s|\s\'" => " ")
    replace(quoteless, r"[^a-z0-9\s\']" => "")
end

cleansentence (generic function with 1 method)

In [74]:
function wordcount(sentence)
    counts = Dict()
    words = sentence |> lowercase |> cleansentence |> split
    for w in words
        if haskey(counts, w)
            counts[w] += 1
        else
            w = replace(w, r"^\'|\'$" => "")
            if w == "" || count(i->(i=='''), w) > 1
                continue
            end
            counts[w] = 1
        end
    end
    counts
end

wordcount (generic function with 1 method)

In [75]:
replace("Joe can't tell between 'large' and large.", "," => " ")

"Joe can't tell between 'large' and large."

In [76]:
@testset "no words" begin
    @test wordcount(" .\n,\t!^&*()~@#\$%{}[]:;'/<>") == Dict()
end

[0m[1mTest Summary: | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
no words      | [32m   1  [39m[36m    1[39m


Test.DefaultTestSet("no words", Any[], 1, false, false)

In [77]:
@testset "count one word" begin
    @test wordcount("word") == Dict("word" => 1)
end

[0m[1mTest Summary:  | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
count one word | [32m   1  [39m[36m    1[39m


Test.DefaultTestSet("count one word", Any[], 1, false, false)

In [78]:
@testset "count one of each word" begin
    @test wordcount("one of each") == Dict("one" => 1, "of" => 1, "each" => 1)
end

@testset "multiple occurrences of a word" begin
    @test wordcount("one fish two fish red fish blue fish") == Dict("one" => 1, "fish" => 4, "two" => 1, "red" => 1, "blue" => 1)
end

[0m[1mTest Summary:          | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
count one of each word | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:                  | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
multiple occurrences of a word | [32m   1  [39m[36m    1[39m


Test.DefaultTestSet("multiple occurrences of a word", Any[], 1, false, false)

In [79]:
@testset "handles cramped lists" begin
    @test wordcount("one,two,three") == Dict("one" => 1, "two" => 1, "three" => 1)
end

[0m[1mTest Summary:         | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
handles cramped lists | [32m   1  [39m[36m    1[39m


Test.DefaultTestSet("handles cramped lists", Any[], 1, false, false)

In [80]:
@testset "handles expanded lists" begin
    @test wordcount("one,\ntwo,\nthree") == Dict("one" => 1, "two" => 1, "three" => 1)
end

@testset "ignore punctuation" begin
    @test wordcount("car: carpet as java: javascript!!&@\$%^&") == Dict("car" => 1, "carpet" => 1, "as" => 1, "java" => 1, "javascript" => 1)
end

@testset "include numbers" begin
    @test wordcount("testing, 1, 2 testing") == Dict("testing" => 2, "1" => 1, "2" => 1)
end

@testset "normalize case" begin
    @test wordcount("go Go GO Stop stop") == Dict("go" => 3, "stop" => 2)
end

@testset "with apostrophes" begin
    @test wordcount("First: don't laugh. Then: don't cry.") == Dict("first" => 1, "don't" => 2, "laugh" => 1, "then" => 1, "cry" => 1)
    @test wordcount("Should've could've would've") == Dict("should've" => 1, "could've" => 1, "would've" => 1)
end

[0m[1mTest Summary:          | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
handles expanded lists | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:      | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
ignore punctuation | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:   | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
include numbers | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:  | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
normalize case | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:    | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
with apostrophes | [32m   2  [39m[36m    2[39m


Test.DefaultTestSet("with apostrophes", Any[], 2, false, false)

In [81]:
@testset "with quotations" begin
    @test wordcount("Joe can't tell between 'large' and large.") == Dict("joe" => 1, "can't" => 1, "tell" => 1, "between" => 1, "large" => 2, "and" => 1)
end

@testset "substrings from the beginning" begin
    @test wordcount("Joe can't tell between app, apple and a.") == Dict("joe" => 1, "can't" => 1, "tell" => 1, "between" => 1, "app" => 1, "apple" => 1, "and" => 1, "a" => 1)
end

@testset "multiple spaces not detected as a word" begin
    @test wordcount(" multiple   whitespaces") == Dict("multiple" => 1, "whitespaces" => 1)
end

@testset "alternating word separators not detected as a word" begin
    @test wordcount(",\n,one,\n ,two \n 'three'") == Dict("one" => 1, "two" => 1, "three" => 1)
end

[0m[1mTest Summary:   | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
with quotations | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:                 | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
substrings from the beginning | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:                          | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
multiple spaces not detected as a word | [32m   1  [39m[36m    1[39m
[0m[1mTest Summary:                                      | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
alternating word separators not detected as a word | [32m   1  [39m[36m    1[39m


Test.DefaultTestSet("alternating word separators not detected as a word", Any[], 1, false, false)