Skip to content

Commit

Permalink
Intro slides and minimal solution
Browse files Browse the repository at this point in the history
Slides to introduce the problem and approach, and basic working bigram
solution script.
  • Loading branch information
mathias-brandewinder committed Feb 7, 2015
1 parent f954bc0 commit 61d4e9e
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 0 deletions.
63 changes: 63 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto

###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp

###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary

###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary

###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
Binary file added organizer/Introduction.pptx
Binary file not shown.
87 changes: 87 additions & 0 deletions organizer/Minimal-Solution.fsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Sample text: What a Wonderful World
// http://en.wikipedia.org/wiki/What_a_Wonderful_World

let sample = """
I see trees of green, red roses, too,
I see them bloom, for me and you
And I think to myself
What a wonderful world.
I see skies of blue, and clouds of white,
The bright blessed day, the dark sacred night
And I think to myself
What a wonderful world.
The colors of the rainbow, so pretty in the sky,
Are also on the faces of people going by.
I see friends shaking hands, sayin', "How do you do?"
They're really sayin', "I love you."
I hear babies cryin'. I watch them grow.
They'll learn much more than I'll ever know
And I think to myself
What a wonderful world
Yes, I think to myself
What a wonderful world"""

// just bigrams, simple approach

// break a string into "words"

let separators = [|' '; '\n' |]
let wordify (text:string) =
text.Split separators
|> Array.filter (fun word -> word <> "")

// group sequence of words into bigrams

let bigramify (text:string[]) = text |> Seq.windowed 2

// find all the bigrams that begin with a given word
// and return all the possible "next words"

let followingWords word (bigrams:string[] seq) =
bigrams
|> Seq.filter (fun bigram -> bigram.[0] = word)
|> Seq.map (fun bigram -> bigram.[1])
|> Seq.toArray

// pickup a random next word from possible ones.
// if no candidate is found, return None.

let rng = System.Random ()
let nextWord (bigrams:string[] seq) word =
let candidates =
bigrams
|> followingWords word
match candidates with
| [||] -> None
| _ ->
let index = rng.Next(candidates.Length)
candidates.[index] |> Some

// given a text sample and a starting word,
// search for a next word and append it
// to a sentence, until no next word is found
// or the last word is followed by ., so
// that the results "looks like" a full sentence.
let generateFrom (start:string) (sample:string) =
let bigrams =
sample
|> wordify
|> bigramify
let next = nextWord bigrams
let rec generate sentence word =
match (next word) with
| None -> sentence
| Some(nextWord) ->
let sentence = sentence + " " + nextWord
if nextWord.EndsWith(".")
then sentence
else generate sentence nextWord
generate start start

// ... see it in action
sample |> generateFrom "I"
sample |> generateFrom "you"

0 comments on commit 61d4e9e

Please sign in to comment.