Skip to content
Markov model that generates text
Branch: master
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Type Name Latest commit message Commit time
Failed to load latest commit information.



> Pkg.clone("")


Import text and clean it

# import text
f = open("mark_twain_books/adventures_of_tom_sawyer.txt")
corpus = readall(f);
# clean text
chars_to_remove = r"[^a-z ]" # I prefer whitelisting chars I want to keep
corpus = clean_corpus(corpus, chars_to_remove);

Create the Markov model

# create finite Markov model (create more for "tricke-down" effect)
ngram = 2
groupby = "words"
M = get_corpus_frequencies(sub_corpus_clean, ngram, groupby = groupby)
M = tuple(M) # add more Markov models to this tuple

Run the model

# get unique symbols (by words or by characters)
unique_symbols = unique(split(corpus)) # for words
unique_symbols = unique(split(corpus, "")) # for chars

# choose random ngram set of symbols from text
ϕ = get_phi(corpus, ngram, groupby = groupby)
@show ϕ

num_steps = 200
markov_chain_text = generate_text(ϕ, num_steps, unique_symbols, ngram, M,
join(markov_chain_text, " ")
You can’t perform that action at this time.