-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Slides to introduce the problem and approach, and basic working bigram solution script.
- Loading branch information
1 parent
f954bc0
commit 61d4e9e
Showing
3 changed files
with
150 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
############################################################################### | ||
# Set default behavior to automatically normalize line endings. | ||
############################################################################### | ||
* text=auto | ||
|
||
############################################################################### | ||
# Set default behavior for command prompt diff. | ||
# | ||
# This is need for earlier builds of msysgit that does not have it on by | ||
# default for csharp files. | ||
# Note: This is only used by command line | ||
############################################################################### | ||
#*.cs diff=csharp | ||
|
||
############################################################################### | ||
# Set the merge driver for project and solution files | ||
# | ||
# Merging from the command prompt will add diff markers to the files if there | ||
# are conflicts (Merging from VS is not affected by the settings below, in VS | ||
# the diff markers are never inserted). Diff markers may cause the following | ||
# file extensions to fail to load in VS. An alternative would be to treat | ||
# these files as binary and thus will always conflict and require user | ||
# intervention with every merge. To do so, just uncomment the entries below | ||
############################################################################### | ||
#*.sln merge=binary | ||
#*.csproj merge=binary | ||
#*.vbproj merge=binary | ||
#*.vcxproj merge=binary | ||
#*.vcproj merge=binary | ||
#*.dbproj merge=binary | ||
#*.fsproj merge=binary | ||
#*.lsproj merge=binary | ||
#*.wixproj merge=binary | ||
#*.modelproj merge=binary | ||
#*.sqlproj merge=binary | ||
#*.wwaproj merge=binary | ||
|
||
############################################################################### | ||
# behavior for image files | ||
# | ||
# image files are treated as binary by default. | ||
############################################################################### | ||
#*.jpg binary | ||
#*.png binary | ||
#*.gif binary | ||
|
||
############################################################################### | ||
# diff behavior for common document formats | ||
# | ||
# Convert binary document formats to text before diffing them. This feature | ||
# is only available from the command line. Turn it on by uncommenting the | ||
# entries below. | ||
############################################################################### | ||
#*.doc diff=astextplain | ||
#*.DOC diff=astextplain | ||
#*.docx diff=astextplain | ||
#*.DOCX diff=astextplain | ||
#*.dot diff=astextplain | ||
#*.DOT diff=astextplain | ||
#*.pdf diff=astextplain | ||
#*.PDF diff=astextplain | ||
#*.rtf diff=astextplain | ||
#*.RTF diff=astextplain |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
// Sample text: What a Wonderful World | ||
// http://en.wikipedia.org/wiki/What_a_Wonderful_World | ||
|
||
let sample = """ | ||
I see trees of green, red roses, too, | ||
I see them bloom, for me and you | ||
And I think to myself | ||
What a wonderful world. | ||
I see skies of blue, and clouds of white, | ||
The bright blessed day, the dark sacred night | ||
And I think to myself | ||
What a wonderful world. | ||
The colors of the rainbow, so pretty in the sky, | ||
Are also on the faces of people going by. | ||
I see friends shaking hands, sayin', "How do you do?" | ||
They're really sayin', "I love you." | ||
I hear babies cryin'. I watch them grow. | ||
They'll learn much more than I'll ever know | ||
And I think to myself | ||
What a wonderful world | ||
Yes, I think to myself | ||
What a wonderful world""" | ||
|
||
// just bigrams, simple approach | ||
|
||
// break a string into "words" | ||
|
||
let separators = [|' '; '\n' |] | ||
let wordify (text:string) = | ||
text.Split separators | ||
|> Array.filter (fun word -> word <> "") | ||
|
||
// group sequence of words into bigrams | ||
|
||
let bigramify (text:string[]) = text |> Seq.windowed 2 | ||
|
||
// find all the bigrams that begin with a given word | ||
// and return all the possible "next words" | ||
|
||
let followingWords word (bigrams:string[] seq) = | ||
bigrams | ||
|> Seq.filter (fun bigram -> bigram.[0] = word) | ||
|> Seq.map (fun bigram -> bigram.[1]) | ||
|> Seq.toArray | ||
|
||
// pickup a random next word from possible ones. | ||
// if no candidate is found, return None. | ||
|
||
let rng = System.Random () | ||
let nextWord (bigrams:string[] seq) word = | ||
let candidates = | ||
bigrams | ||
|> followingWords word | ||
match candidates with | ||
| [||] -> None | ||
| _ -> | ||
let index = rng.Next(candidates.Length) | ||
candidates.[index] |> Some | ||
|
||
// given a text sample and a starting word, | ||
// search for a next word and append it | ||
// to a sentence, until no next word is found | ||
// or the last word is followed by ., so | ||
// that the results "looks like" a full sentence. | ||
let generateFrom (start:string) (sample:string) = | ||
let bigrams = | ||
sample | ||
|> wordify | ||
|> bigramify | ||
let next = nextWord bigrams | ||
let rec generate sentence word = | ||
match (next word) with | ||
| None -> sentence | ||
| Some(nextWord) -> | ||
let sentence = sentence + " " + nextWord | ||
if nextWord.EndsWith(".") | ||
then sentence | ||
else generate sentence nextWord | ||
generate start start | ||
|
||
// ... see it in action | ||
sample |> generateFrom "I" | ||
sample |> generateFrom "you" |