In [1]:
import nltk

# read file
with open('ebook.txt', 'r', encoding="utf8") as f:
    text = f.read()

# split to sentences, and split sentences to tokens
sentences = nltk.sent_tokenize(text)
tokens_in_sentences = [nltk.word_tokenize(sent) for sent in sentences]

# remove punctuation
 # does not remove utf punc, to do so use e.g. https://stackoverflow.com/questions/11066400/remove-punctuation-from-unicode-formatted-strings
tokens_in_sentences_nopunc = []
from string import punctuation
for token_sentence in tokens_in_sentences:
    tokens_in_sentences_nopunc.append([token for token in token_sentence if token not in punctuation])    

## POS tagging

In [2]:
# tag words in sentences first
tagged = [nltk.pos_tag(tag) for tag in tokens_in_sentences_nopunc]

## NER with entity classification (using nltk.ne_chunk)

In [3]:
 # use nltk.ne_chunk ("_sents" is for sentences) - returns tree structure per sentence
chunked_sentences = nltk.ne_chunk_sents(tagged, binary=False)

for s in chunked_sentences:
    print(s)

(S
  (PERSON Front/NNP)
  (PERSON Matter/NNP Preface/NNP Foundations/NNP Problem/NNP)
  Statement/NNP
  Our/PRP$
  Simulation/NNP
  Application/NNP
  Soapbox/NNP
  on/IN
  (ORGANIZATION
    Use/NNP
    Cases/NNP
    Solution/NNP
    Approach/NNP
    Methodology/NNP
    Technique/NNP)
  and/CC
  (ORGANIZATION Process/NNP Additional/NNP)
  Topics/NNP
  Non-Functional/JJ
  Requirements/NNP
  Deliverables/NNP
  Roulette/NNP
  This/DT
  part/NN
  describes/VBZ
  the/DT
  game/NN
  of/IN
  (GPE Roulette/NNP))
(S
  (GPE Roulette/NNP)
  is/VBZ
  the/DT
  game/NN
  with/IN
  the/DT
  big/JJ
  wheel/NN)
(S
  They/PRP
  spin/VBP
  the/DT
  wheel/NN
  toss/NN
  in/IN
  a/DT
  marble/JJ
  and/CC
  wait/NN
  for/IN
  the/DT
  wheel/NN
  to/TO
  stop/VB
  spinning/NN)
(S
  (GPE Roulette/NNP)
  is/VBZ
  –/NNP
  essentially/RB
  –/VBZ
  a/DT
  stateless/JJ
  game/NN
  with/IN
  numerous/JJ
  bets/NNS
  and/CC
  a/DT
  very/RB
  simple/JJ
  process/NN
  for/IN
  game/NN
  play/NN)
(S
  The/DT
  chapters

(S
  (PERSON Extend/NNP)
  (ORGANIZATION Question/NNP Throw/NNP)
  Design/NNP
  Natural/NNP
  Throw/NNP
  Design/NNP
  (PERSON Craps/NNP Throw/NNP)
  Design/NNP
  Eleven/NNP
  Throw/NNP
  Design/NNP
  Point/NNP
  Throw/NNP
  Design/NNP
  (PERSON Craps/NNP Game/NNP)
  Design/NNP
  Throw/NNP
  Deliverables/NNP
  Dice/NNP
  Class/NNP
  Dice/NNP
  Analysis/NNP
  Throw/NNP
  (PERSON Rework/NNP Dice/NNP)
  Design/NNP
  Dice/NNP
  Deliverables/NNP
  Dice/NNP
  Optimization/NNP
  Throw/NNP
  Builder/NNP
  Class/NNP
  Throw/NNP
  Builder/NNP
  Analysis/NNP
  Outcomes/NNP
  with/IN
  (PERSON Variable/NNP Odds/NNP)
  Refactoring/VBG
  The/DT
  (ORGANIZATION Outcome/NNP Hierarchy/NNP Soapbox/NNP)
  on/IN
  (ORGANIZATION Subclasses/NNP Soapbox/NNP)
  on/IN
  (ORGANIZATION Architecture/NNP Throw/NNP Builder/NNP Questions/NNP)
  and/CC
  (ORGANIZATION Answers/NNP Soapbox/NNP)
  on/IN
  Justification/NNP
  Design/NNP
  Light/NNP
  Design/NNP
  Heavy/NNP
  Common/NNP
  Design/NNP
  Throw-Builder/NNP
  

(S
  The/DT
  idea/NN
  is/VBZ
  that/IN
  someone/NN
  can/MD
  be/VB
  able/JJ
  to/TO
  work/VB
  with/IN
  a/DT
  language/NN
  –/NN
  coding/VBG
  –/NNS
  but/CC
  not/RB
  quite/RB
  ready/JJ
  to/TO
  do/VB
  design/NN
  work/NN)
(S
  The/DT
  coders/NNS
  can/MD
  be/VB
  given/VBN
  a/DT
  detailed/JJ
  “/NN
  specification/NN
  ”/NN
  from/IN
  which/WDT
  they/PRP
  can/MD
  write/VB
  code/NN)
(S
  This/DT
  distinction/NN
  between/IN
  designers/NNS
  and/CC
  coders/NNS
  doesn/VBP
  ’/JJ
  t/NNS
  really/RB
  exist/VBP)
(S
  It/PRP
  ’/VBZ
  s/JJ
  unhelpful/JJ
  to/TO
  try/VB
  and/CC
  make/VB
  this/DT
  distinction/NN)
(S
  All/DT
  programming/VBG
  involves/NNS
  design/NN
  at/IN
  some/DT
  level/NN)
(S
  If/IN
  we/PRP
  attempt/VBP
  to/TO
  write/VB
  a/DT
  design/NN
  specification/NN
  so/RB
  detailed/VBD
  that/IN
  someone/NN
  else/RB
  can/MD
  transform/VB
  it/PRP
  into/IN
  code/NN
  without/IN
  them/PRP
  having/VBG
  to/TO
  make/VB
  any/DT
 

(S
  The/DT
  completed/VBN
  project/NN
  results/NNS
  in/IN
  an/DT
  application/NN
  that/WDT
  can/MD
  provide/VB
  simple/JJ
  tabular/JJ
  results/NNS
  that/WDT
  shows/VBZ
  the/DT
  average/JJ
  losses/NNS
  expected/VBN
  from/IN
  each/DT
  betting/VBG
  strategy/NN)
(S
  The/DT
  interesting/JJ
  degree/NN
  of/IN
  freedom/NN
  in/IN
  each/DT
  of/IN
  the/DT
  simulations/NNS
  is/VBZ
  the/DT
  player/NN
  ’/NNP
  s/NN
  betting/VBG
  strategy/NN)
(S
  The/DT
  design/NN
  will/MD
  permit/VB
  easy/JJ
  adaptation/NN
  and/CC
  maintenance/NN
  of/IN
  the/DT
  player/NN
  ’/NNP
  s/NN
  strategies/NNS)
(S
  The/DT
  resulting/JJ
  application/NN
  program/NN
  can/MD
  be/VB
  extended/VBN
  by/IN
  inserting/VBG
  additional/JJ
  betting/NN
  systems/NNS
  which/WDT
  allows/VBZ
  exploration/NN
  of/IN
  what/WP
  if/IN
  any/DT
  player/NN
  actions/NNS
  can/MD
  minimize/VB
  the/DT
  losses/NNS)
(S (GPE Roulette/NN))
(S
  For/IN
  those/DT
  who/WP
  ’/VBP
  

(S
  Created/VBN
  using/VBG
  Sphinx/NNP
  1.3.1./CD
  index/NN
  modules/NNS
  next/IN
  previous/JJ
  Building/NNP
  Skills/NNP
  in/IN
  Object-Oriented/NNP
  Design/NNP
  3.1/CD
  documentation/NN
  »/JJ
  Foundations/NNP
  We/PRP
  ’/VBP
  ll/JJ
  set/VB
  our/PRP$
  goal/NN
  by/IN
  presenting/VBG
  several/JJ
  elements/NNS
  that/WDT
  make/VBP
  up/RP
  a/DT
  complete/JJ
  Problem/NNP
  Statement/NNP
  a/DT
  context/NN
  in/IN
  which/WDT
  the/DT
  problem/NN
  arises/VBZ
  the/DT
  problem/NN
  the/DT
  forces/NNS
  that/WDT
  influence/VBP
  the/DT
  choice/NN
  of/IN
  solution/NN
  the/DT
  solution/NN
  that/WDT
  balances/VBZ
  the/DT
  forces/NNS
  and/CC
  some/DT
  consequences/NNS
  of/IN
  the/DT
  chosen/NN
  solution/NN)
(S
  Based/VBN
  on/IN
  the/DT
  problem/NN
  statement/NN
  we/PRP
  ’/VBP
  ll/JJ
  present/JJ
  the/DT
  high-level/NN
  use/NN
  case/NN
  that/IN
  this/DT
  software/NN
  implements/NNS)
(S This/DT will/MD be/VB Our/PRP$ Simulation/NNP

  visualization/NN)
(S Consequences/NNS)
(S
  We/PRP
  ’/VBP
  re/VB
  going/VBG
  build/IN
  the/DT
  simulator/NN
  application/NN
  that/WDT
  supports/VBZ
  this/DT
  high-level/NN
  or/CC
  “/NN
  business/NN
  ”/NN
  use/NN
  case/NN)
(S
  We/PRP
  ’/VBP
  re/JJ
  not/RB
  going/VBG
  to/TO
  build/VB
  the/DT
  (ORGANIZATION IDE/NNP)
  to/TO
  build/VB
  the/DT
  new/JJ
  classes/NNS)
(S Any/DT IDE/NNP should/MD work/VB)
(S
  Additionally/RB
  we/PRP
  won/VBD
  ’/NNP
  t/NN
  address/NN
  how/WRB
  to/TO
  analyze/VB
  the/DT
  results/NNS)
(S
  One/CD
  of/IN
  the/DT
  most/RBS
  important/JJ
  consequences/NNS
  of/IN
  our/PRP$
  solution/NN
  is/VBZ
  that/IN
  we/PRP
  will/MD
  build/VB
  an/DT
  application/NN
  into/IN
  which/WDT
  new/JJ
  player/NN
  betting/VBG
  strategies/NNS
  can/MD
  be/VB
  inserted/VBN)
(S
  (GPE Clever/NNP)
  gamblers/NNS
  invent/JJ
  new/JJ
  strategies/NNS
  all/PDT
  the/DT
  time/NN)
(S
  We/PRP
  will/MD
  not/RB
  know/VB
  all/DT
  

(S
  We/PRP
  are/VBP
  aware/JJ
  that/IN
  our/PRP$
  statistical/JJ
  analysis/NN
  has/VBZ
  a/DT
  number/NN
  of/IN
  deficiencies/NNS)
(S
  We/PRP
  will/MD
  avoid/VB
  any/DT
  deeper/JJR
  investigation/NN
  into/IN
  statistics/NNS)
(S
  (ORGANIZATION Methodology/NNP Technique/NNP)
  and/CC
  Process/NNP
  We/PRP
  want/VBP
  to/TO
  focus/VB
  on/IN
  technical/JJ
  skills/NNS
  we/PRP
  won/VBD
  ’/NNP
  t/NNS
  follow/VBP
  any/DT
  particular/JJ
  software/NN
  development/NN
  methodology/NN
  too/RB
  closely/RB)
(S
  We/PRP
  prefer/VBP
  to/TO
  lift/VB
  up/RP
  a/DT
  few/JJ
  techniques/NNS
  which/WDT
  have/VBP
  a/DT
  great/JJ
  deal/NN
  of/IN
  benefit/NN)
(S (GPE Incremental/NNP) Development/NNP)
(S
  Each/DT
  chapter/NN
  is/VBZ
  a/DT
  “/JJ
  sprint/NN
  ”/NN
  that/WDT
  produces/VBZ
  some/DT
  collection/NN
  of/IN
  deliverables/NNS)
(S Each/DT part/NN is/VBZ a/DT complete/JJ release/NN)
(S (GPE Unit/NN) Testing/VBG)
(S
  We/PRP
  don/VBP
  ’/JJ
  t

(S
  We/PRP
  start/VBP
  with/IN
  a/DT
  problem/NN
  statement/NN
  define/VB
  the/DT
  use/NN
  case/NN
  and/CC
  then/RB
  write/VB
  software/NN
  which/WDT
  is/VBZ
  narrowly/RB
  focused/VBN
  on/IN
  the/DT
  actor/NN
  ’/NNP
  s/NN
  needs/NNS)
(S
  By/IN
  developing/VBG
  our/PRP$
  application/NN
  in/IN
  small/JJ
  increments/NNS
  we/PRP
  can/MD
  ask/VB
  ourself/PRP
  at/IN
  each/DT
  step/NN
  “/RB
  Does/VBZ
  this/DT
  meet/VB
  the/DT
  actor/NN
  ’/NNP
  s/NN
  needs/VBZ
  ”/IN
  It/PRP
  ’/VBZ
  s/JJ
  fairly/RB
  easy/JJ
  to/TO
  keep/VB
  a/DT
  software/NN
  development/NN
  project/NN
  focused/VBD
  when/WRB
  we/PRP
  have/VBP
  use/JJ
  cases/NNS
  to/TO
  describe/VB
  our/PRP$
  goals/NNS)
(S (GPE Performance/NN))
(S
  We/PRP
  don/VBP
  ’/JJ
  t/NN
  address/NN
  this/DT
  specifically/RB
  in/IN
  this/DT
  book/NN)
(S
  However/RB
  the/DT
  presence/NN
  of/IN
  extensive/JJ
  unit/NN
  tests/NNS
  allows/VBZ
  us/PRP
  to/TO
  alter/VB
  the/

  capture/NN)
(S
  A/DT
  project/NN
  begins/VBZ
  with/IN
  many/JJ
  kinds/NNS
  of/IN
  ignorance/NN
  and/CC
  takes/VBZ
  steps/NNS
  to/TO
  reduce/VB
  that/DT
  ignorance/NN)
(S
  Some/DT
  of/IN
  those/DT
  steps/NNS
  should/MD
  involve/VB
  revising/VBG
  or/CC
  consolidating/VBG
  previous/JJ
  learnings/NNS)
(S
  A/DT
  project/NN
  without/IN
  rework/NN
  is/VBZ
  suspiciously/RB
  under-engineered/JJ)
(S
  For/IN
  some/DT
  the/DT
  word/NN
  rework/NN
  has/VBZ
  a/DT
  negative/JJ
  connotation/NN)
(S
  If/IN
  you/PRP
  find/VBP
  the/DT
  word/NN
  distasteful/JJ
  please/NN
  replace/VB
  every/DT
  occurance/NN
  with/IN
  any/DT
  of/IN
  the/DT
  synonyms/JJ
  adaptation/NN
  evolution/NN
  enhancement/NN
  mutation/NN)
(S
  We/PRP
  prefer/VBP
  the/DT
  slightly/RB
  negative/JJ
  connotation/NN
  of/IN
  the/DT
  word/NN
  rework/NN
  because/IN
  it/PRP
  helps/VBZ
  managers/NNS
  realize/VB
  the/DT
  importance/NN
  of/IN
  incremental/JJ
  learning/

(S
  The/DT
  purpose/NN
  of/IN
  each/DT
  chapter/NN
  is/VBZ
  to/TO
  write/VB
  the/DT
  source/NN
  files/NNS
  for/IN
  one/CD
  or/CC
  more/JJR
  classes/NNS
  the/DT
  source/NN
  files/NNS
  for/IN
  one/CD
  or/CC
  more/JJR
  unit/NN
  tests/NNS
  and/CC
  assure/NN
  that/IN
  a/DT
  minimal/JJ
  set/NN
  of/IN
  (ORGANIZATION API/NNP)
  documentation/NN
  is/VBZ
  available/JJ)
(S (GPE Source/NN) (ORGANIZATION Files/NNS))
(S
  The/DT
  source/NN
  files/NNS
  are/VBP
  the/DT
  most/RBS
  important/JJ
  deliverable/NN)
(S
  In/IN
  effect/NN
  this/DT
  is/VBZ
  the/DT
  working/JJ
  application/NN
  program/NN)
(S
  Generally/RB
  we/PRP
  will/MD
  be/VB
  running/VBG
  this/DT
  application/NN
  from/IN
  within/IN
  the/DT
  (ORGANIZATION Integrated/NNP Development/NNP Environment/NNP)
  IDE/NNP)
(S
  We/PRP
  can/MD
  of/IN
  course/NN
  create/VBP
  a/DT
  stand-alone/JJ
  program/NN)
(S
  In/IN
  the/DT
  case/NN
  of/IN
  (GPE Python/NNP)
  the/DT
  “/NNP
  prog

(S
  The/DT
  “/NNP
  inside/IN
  ”/NNP
  bets/NNS
  are/VBP
  the/DT
  38/CD
  numbers/NNS
  and/CC
  small/JJ
  groups/NNS
  of/IN
  numbers/NNS
  these/DT
  bets/NNS
  all/DT
  have/VBP
  relatively/RB
  high/JJ
  odds/NNS)
(S
  The/DT
  “/NNP
  outside/IN
  ”/NNP
  bets/NNS
  are/VBP
  large/JJ
  groups/NNS
  of/IN
  numbers/NNS
  and/CC
  have/VBP
  relatively/RB
  low/JJ
  odds/NNS)
(S
  If/IN
  you/PRP
  are/VBP
  new/JJ
  to/TO
  casino/VB
  gambling/VBG
  see/VB
  (PERSON Odds/NNP)
  and/CC
  (PERSON Payouts/NNP)
  for/IN
  more/JJR
  information/NN
  on/IN
  odds/NNS
  and/CC
  why/WRB
  they/PRP
  are/VBP
  offered/VBN)
(S
  A/DT
  “/JJ
  straight/JJ
  bet/NN
  ”/NN
  is/VBZ
  a/DT
  bet/NN
  on/IN
  a/DT
  single/JJ
  number/NN)
(S
  There/EX
  are/VBP
  38/CD
  possible/JJ
  bets/NNS
  and/CC
  they/PRP
  pay/VBP
  odds/NNS
  of/IN
  35/CD
  to/TO
  1/CD)
(S
  Each/DT
  bin/NN
  on/IN
  the/DT
  wheel/NN
  pays/VBZ
  one/CD
  of/IN
  the/DT
  straight/JJ
  bets/NNS)
(S
  A

(S
  For/IN
  each/DT
  loss/NN
  however/RB
  add/VBZ
  the/DT
  amount/NN
  of/IN
  the/DT
  bet/NN
  to/TO
  the/DT
  end/NN
  of/IN
  the/DT
  sequence/NN
  as/IN
  a/DT
  loss/NN
  to/TO
  be/VB
  recouped/VBN)
(S
  Here/RB
  ’/NNP
  s/VBD
  an/DT
  example/NN
  of/IN
  the/DT
  cancellation/NN
  system/NN
  using/VBG
  1/CD
  2/CD
  3/CD
  4/CD
  5/CD
  6/CD
  7/CD
  8/CD
  9/CD)
(S Bet/NNP 1+9/CD)
(S A/DT win/NN)
(S
  Cancel/NNP
  1/CD
  and/CC
  9/CD
  leaving/VBG
  2/CD
  3/CD
  4/CD
  5/CD
  6/CD
  7/CD
  8/CD)
(S Bet/NNP 2+8/CD)
(S A/DT loss/NN)
(S Add/$ 10/CD leaving/VBG 2/CD 3/CD 4/CD 5/CD 6/CD 7/CD 8/CD 10/CD)
(S Bet/NNP 2+10/CD)
(S A/DT loss/NN)
(S
  Add/$
  12/CD
  leaving/VBG
  2/CD
  3/CD
  4/CD
  5/CD
  6/CD
  7/CD
  8/CD
  10/CD
  12/CD)
(S Bet/NNP 2+12/CD)
(S A/DT win/NN)
(S
  Cancel/NNP
  2/CD
  and/CC
  12/CD
  leaving/VBG
  3/CD
  4/CD
  5/CD
  6/CD
  7/CD
  8/CD
  10/CD)
(S Next/JJ bet/NN will/MD be/VB 3+10/CD)
(S
  A/DT
  player/NN
  could/MD
  use/VB
  the/DT

(S
  Example/VB
  the/DT
  “/NN
  1/CD
  ”/NN
  bin/NN
  has/VBZ
  the/DT
  following/VBG
  winning/NN
  (PERSON Outcomes/NNP)
  “/NNP
  1/CD
  ”/NNP
  “/NNP
  Red/NNP
  ”/NNP
  “/NNP
  Odd/NNP
  ”/NNP
  “/NNP
  (PERSON Low/NNP)
  ”/NNP
  “/NNP
  Column/NNP
  1/CD
  ”/NNP
  “/NNP
  Dozen/NNP
  1-12/JJ
  ”/NNP
  “/NNP
  Split/NNP
  1-2/JJ
  ”/NNP
  “/NNP
  Split/NNP
  1-4/JJ
  ”/NNP
  “/NNP
  Street/NNP
  1-2-3/JJ
  ”/NNP
  “/NNP
  Corner/NNP
  1-2-4-5/JJ
  ”/NNP
  “/NNP
  Five/NNP
  Bet/NNP
  ”/NNP
  “/NNP
  Line/NNP
  1-2-3-4-5-6/JJ
  ”/NNP
  “/NNP
  00-0-1-2-3/JJ
  ”/NNP
  “/NNP
  Dozen/NNP
  1/CD
  ”/NNP
  “/NNP
  (PERSON Low/NNP)
  ”/NNP
  and/CC
  “/NNP
  Column/NNP
  1/CD
  ”/NN)
(S Collaborators/NNS)
(S
  Collects/VBZ
  the/DT
  Outcomes/NNP
  into/IN
  bins/NNS
  used/VBN
  by/IN
  the/DT
  overall/JJ
  Game/NNP
  to/TO
  get/VB
  a/DT
  next/JJ
  set/NN
  of/IN
  winning/VBG
  Outcomes/NNS)
(S (GPE Table/JJ) Responsibilities/NNS)
(S
  A/DT
  collection/NN
  of/IN
  bets/NNS
  

  Player/NNP)
(S
  We/PRP
  ’/VBP
  ll/JJ
  update/JJ
  all/DT
  of/IN
  the/DT
  existing/VBG
  cards/NNS
  to/TO
  name/VB
  their/PRP$
  collaboration/NN
  with/IN
  (ORGANIZATION Bet/NNP))
(S
  What/WP
  card/NN
  has/VBZ
  responsibility/NN
  for/IN
  keeping/VBG
  all/DT
  of/IN
  the/DT
  (ORGANIZATION Bets/NNS))
(S
  (PERSON Does/NNP)
  Table/NNP
  list/NN
  that/IN
  as/IN
  a/DT
  responsibility/NN)
(S
  We/PRP
  should/MD
  update/VB
  these/DT
  cards/NNS
  to/TO
  clarify/VB
  this/DT
  collaboration/NN)
(S
  You/PRP
  should/MD
  continue/VB
  this/DT
  tour/NN
  working/VBG
  your/PRP$
  way/NN
  through/IN
  spinning/VBG
  the/DT
  Wheel/NNP
  to/TO
  get/VB
  a/DT
  list/NN
  of/IN
  winning/VBG
  Outcomes/NNS)
(S
  From/IN
  there/EX
  the/DT
  Game/NNP
  can/MD
  get/VB
  all/DT
  of/IN
  the/DT
  Bets/NNS
  from/IN
  the/DT
  (ORGANIZATION Table/NN)
  and/CC
  see/NN
  which/WDT
  are/VBP
  based/VBN
  on/IN
  winning/VBG
  (PERSON Outcomes/NNP)
  and/CC
  which/WDT

(S
  The/DT
  alternative/NN
  we/PRP
  have/VBP
  chosen/VBN
  is/VBZ
  to/TO
  encapsulate/VB
  the/DT
  payout/NN
  algorithm/NN
  along/IN
  with/IN
  the/DT
  relevant/JJ
  data/NN
  items/NNS
  in/IN
  a/DT
  single/JJ
  bundle/NN)
(S
  If/IN
  (PERSON Outcome/NNP)
  encapsulates/VBZ
  the/DT
  function/NN
  to/TO
  compute/VB
  the/DT
  amount/NN
  won/VBD
  isn/JJ
  ’/NNP
  t/NN
  it/PRP
  just/RB
  a/DT
  glorified/JJ
  subroutine/NN)
(S
  If/IN
  you/PRP
  ’/VBP
  re/JJ
  background/NN
  is/VBZ
  (ORGANIZATION BASIC/NNP)
  or/CC
  (ORGANIZATION FORTRAN/NNP)
  this/DT
  can/MD
  seem/VB
  to/TO
  be/VB
  true/JJ)
(S
  A/DT
  class/NN
  can/MD
  be/VB
  thought/VBN
  of/IN
  as/IN
  a/DT
  glorified/JJ
  subroutine/NN
  library/NN
  that/WDT
  captures/VBZ
  and/CC
  isolates/VBZ
  data/NNS
  elements/NNS
  along/IN
  with/IN
  their/PRP$
  associated/VBN
  functions/NNS)
(S
  A/DT
  class/NN
  is/VBZ
  more/RBR
  powerful/JJ
  than/IN
  a/DT
  simple/JJ
  subroutine/NN
  libra

(S
  This/DT
  means/VBZ
  the/DT
  __hash__/NN
  self/NN
  method/NN
  for/IN
  several/JJ
  objects/NNS
  that/WDT
  represent/VBP
  the/DT
  same/JJ
  Outcome/NNP
  must/MD
  also/RB
  have/VB
  the/DT
  same/JJ
  hash/NN
  code/NN)
(S
  When/WRB
  we/PRP
  put/VBD
  an/DT
  object/NN
  into/IN
  a/DT
  set/NN
  or/CC
  a/DT
  dictionary/JJ
  (PERSON Python/NNP)
  uses/VBZ
  the/DT
  hash/NN
  function/NN
  which/WDT
  is/VBZ
  implemented/VBN
  by/IN
  the/DT
  __hash__/NNP
  method/NN)
(S
  Sometimes/RB
  the/DT
  hash/NN
  codes/NNS
  are/VBP
  equal/JJ
  but/CC
  the/DT
  object/JJ
  attributes/VBZ
  aren/NNS
  ’/JJ
  t/VBP
  actually/RB
  equal/JJ)
(S
  This/DT
  is/VBZ
  called/VBN
  a/DT
  hash/NN
  collision/NN
  and/CC
  it/PRP
  ’/VBZ
  s/JJ
  rare/NN
  but/CC
  not/RB
  unexpected/JJ)
(S
  If/IN
  we/PRP
  don/VBP
  ’/JJ
  t/NN
  implement/NN
  this/DT
  the/DT
  default/NN
  version/NN
  isn/NN
  ’/NNP
  t/VBZ
  too/RB
  useful/JJ
  for/IN
  creating/VBG
  sets/NNS
  of/

  them/PRP)
(S
  The/DT
  definition/NN
  for/IN
  __hash__/NN
  in/IN
  section/NN
  3.3.1/CD
  of/IN
  the/DT
  (ORGANIZATION Language/NNP Reference/NNP Manual/NNP)
  tells/VBZ
  us/PRP
  to/TO
  do/VB
  the/DT
  calculation/NN
  using/VBG
  a/DT
  modulus/NN
  based/VBN
  on/IN
  sys.hash_info.width/NN)
(S
  That/DT
  value/NN
  is/VBZ
  the/DT
  number/NN
  of/IN
  bits/NNS
  the/DT
  actual/JJ
  value/NN
  we/PRP
  want/VBP
  to/TO
  use/VB
  is/VBZ
  sys.hash_info.modulus/JJ
  which/WDT
  is/VBZ
  based/VBN
  on/IN
  the/DT
  width/NN)
(S
  Outcome.__str__/NNP
  self/PRP
  →/VBD
  string/VBG
  Easy-to-read/JJ
  representation/NN
  of/IN
  this/DT
  outcome/NN)
(S See/VB (PERSON Message/NN) Formatting/VBG)
(S
  This/DT
  easy-to-read/JJ
  String/NNP
  output/NN
  method/NN
  is/VBZ
  essential/JJ)
(S
  This/DT
  should/MD
  return/VB
  a/DT
  String/NNP
  representation/NN
  of/IN
  the/DT
  name/NN
  and/CC
  the/DT
  odds/NNS)
(S
  A/DT
  form/NN
  that/WDT
  looks/VBZ
  like/IN

(S
  Since/IN
  a/DT
  Bin/NNP
  is/VBZ
  just/RB
  a/DT
  collection/NN
  of/IN
  individual/JJ
  Outcome/NNP
  objects/VBZ
  we/PRP
  have/VBP
  to/TO
  select/VB
  a/DT
  collection/NN
  class/NN
  to/TO
  contain/VB
  the/DT
  objects/NNS)
(S
  Design/NNP
  Decision/NNP
  –/NNP
  Choosing/VBG
  A/DT
  Collection/NN
  There/EX
  are/VBP
  five/CD
  basic/JJ
  Python/NNP
  types/NNS
  that/WDT
  are/VBP
  a/DT
  containers/NNS
  for/IN
  other/JJ
  objects/NNS)
(S Immutable/JJ Sequence/NNP tuple/NN)
(S
  This/DT
  is/VBZ
  a/DT
  good/JJ
  candidate/NN
  for/IN
  the/DT
  kind/NN
  of/IN
  collection/NN
  we/PRP
  need/VBP
  since/IN
  the/DT
  elements/NNS
  of/IN
  a/DT
  Bin/NNP
  don/NN
  ’/NNP
  t/NN
  change/NN)
(S
  Howver/WRB
  a/DT
  tuple/NN
  allows/VBZ
  duplicates/NNS
  and/CC
  retains/VBZ
  things/NNS
  in/IN
  a/DT
  specific/JJ
  order/NN
  we/PRP
  can/MD
  ’/VB
  t/JJ
  tolerate/NN
  duplicates/NNS
  and/CC
  order/NN
  doesn/NN
  ’/NNP
  t/NN
  matter/NN)
(S (PERS

(S
  In/IN
  most/JJS
  cases/NNS
  however/RB
  the/DT
  extra/JJ
  few/JJ
  instructions/NNS
  required/VBN
  to/TO
  delegate/VB
  a/DT
  method/NN
  to/TO
  an/DT
  internal/JJ
  object/NN
  is/VBZ
  offset/VBN
  by/IN
  the/DT
  benefits/NNS
  gained/VBN
  from/IN
  additional/JJ
  flexibility/NN)
(S
  How/WRB
  can/MD
  you/PRP
  introduce/VB
  (PERSON Set/NNP List/NNP Vector/NNP)
  when/WRB
  these/DT
  don/VBP
  ’/NN
  t/NN
  appear/VBP
  in/IN
  the/DT
  problem/NN)
(S
  We/PRP
  have/VBP
  to/TO
  make/VB
  a/DT
  distinction/NN
  between/IN
  the/DT
  classes/NNS
  that/WDT
  are/VBP
  uncovered/VBN
  during/IN
  analysis/NN
  of/IN
  the/DT
  problem/NN
  in/IN
  general/JJ
  and/CC
  classes/NNS
  are/VBP
  that/DT
  just/RB
  part/NN
  of/IN
  the/DT
  implementation/NN
  of/IN
  this/DT
  particular/JJ
  solution/NN)
(S
  This/DT
  emphasizes/VBZ
  the/DT
  distinction/NN
  between/IN
  the/DT
  problem/NN
  as/IN
  described/VBN
  by/IN
  users/NNS
  and/CC
  a/DT
  sol

(S
  In/IN
  The/DT
  (ORGANIZATION Container/NNP)
  Responsibility/NNP
  we/PRP
  ’/VBP
  ll/JJ
  look/NN
  at/IN
  the/DT
  container/NN
  aspect/NN
  in/IN
  detail/NN)
(S
  In/IN
  The/DT
  (ORGANIZATION Random/NNP Bin/NNP)
  Selection/NNP
  Responsibility/NNP
  we/PRP
  ’/VBP
  ll/JJ
  look/NN
  at/IN
  the/DT
  random/NN
  selection/NN
  aspects/NNS)
(S
  Based/VBN
  on/IN
  this/DT
  the/DT
  Constructing/VBG
  a/DT
  Wheel/NNP
  section/NN
  provides/VBZ
  a/DT
  description/NN
  of/IN
  how/WRB
  we/PRP
  can/MD
  build/VB
  the/DT
  Wheel/NNP
  instance/NN)
(S
  The/DT
  (ORGANIZATION Container/NNP)
  Responsibility/NNP
  Since/IN
  the/DT
  (ORGANIZATION Wheel/NNP)
  is/VBZ
  38/CD
  Bins/NNS
  it/PRP
  is/VBZ
  a/DT
  collection/NN)
(S
  We/PRP
  can/MD
  review/VB
  our/PRP$
  survey/NN
  of/IN
  available/JJ
  collections/NNS
  in/IN
  (GPE Design/NNP)
  Decision/NNP
  –/NNP
  Choosing/VBG
  A/DT
  Collection/NN
  for/IN
  some/DT
  guidance/NN
  here/RB)
(S
  In/IN
  thi

(S
  outcome/JJ
  Outcome/NNP
  –/NNP
  The/DT
  Outcome/NNP
  to/TO
  add/VB
  to/TO
  this/DT
  Bin/NNP
  Bin.next/NNP
  →/NNP
  Bin/NNP
  Generates/VBZ
  a/DT
  random/JJ
  number/NN
  between/IN
  0/CD
  and/CC
  37/CD
  and/CC
  returns/VBZ
  the/DT
  randomly/RB
  selected/VBN
  (PERSON Bin/NNP))
(S
  The/DT
  Random.choice/NNP
  function/NN
  of/IN
  the/DT
  random/NN
  module/NN
  will/MD
  select/VB
  one/CD
  of/IN
  the/DT
  available/JJ
  Bin/NNP
  s/NN
  from/IN
  the/DT
  bins/NNS
  list/NN)
(S
  Returns/NNS
  A/NNP
  Bin/NNP
  selected/VBN
  at/IN
  random/NN
  from/IN
  the/DT
  wheel/NN)
(S
  (GPE Return/NNP)
  type/NN
  (PERSON Bin/NNP)
  Bin.get/NNP
  bin/NN
  →/NNP
  Bin/NNP
  Returns/NNP
  the/DT
  given/VBN
  (PERSON Bin/NNP)
  from/IN
  the/DT
  internal/JJ
  collection/NN)
(S
  Parameters/NNS
  bin/VBP
  int/JJ
  –/NNP
  bin/NN
  number/NN
  in/IN
  the/DT
  range/NN
  zero/NN
  to/TO
  37/CD
  inclusive/NN)
(S Returns/VBZ The/DT requested/JJ Bin/NNP)
(S
  (GPE

(S
  As/IN
  with/IN
  split/NN
  bets/NNS
  the/DT
  bulk/NN
  of/IN
  the/DT
  layout/NN
  can/MD
  be/VB
  handled/VBN
  with/IN
  a/DT
  simple/JJ
  rule/NN
  to/TO
  distinguish/VB
  the/DT
  column/NN
  and/CC
  hence/RB
  the/DT
  “/JJ
  corners/NNS
  ”/VBP)
(S
  A/DT
  number/NN
  in/IN
  the/DT
  center/NN
  column/NN
  5/CD
  8/CD
  11/CD
  .../:
  32/CD
  is/VBZ
  a/DT
  member/NN
  of/IN
  four/CD
  corners/NNS)
(S
  All/DT
  of/IN
  the/DT
  numbers/NNS
  along/IN
  an/DT
  edge/NN
  are/VBP
  members/NNS
  of/IN
  two/CD
  corners/NNS)
(S
  For/IN
  example/NN
  4/CD
  is/VBZ
  part/NN
  of/IN
  1-2-4-5/JJ
  and/CC
  4-5-7-8/JJ)
(S
  At/IN
  the/DT
  ends/NNS
  1/CD
  3/CD
  and/CC
  34/CD
  36/CD
  we/PRP
  see/VBP
  outcomes/NNS
  that/IN
  members/NNS
  of/IN
  just/RB
  one/CD
  corner/JJR
  each/DT)
(S (GPE Line/NNP) Bets/NNS)
(S
  Six/CD
  numbers/NNS
  comprise/VBP
  a/DT
  line/NN
  each/DT
  number/NN
  is/VBZ
  a/DT
  member/NN
  of/IN
  one/CD
  or/CC
  two/CD


## NER with custom patterns

In [4]:
# custom grammar: adjective (optional, multiple) and proper noun (singular/plural)
grammar = "NP: {<JJ>*<NN|NNS>}"
cp = nltk.RegexpParser(grammar)

custom_parsed_sentences = []
for t in tagged:    
    result = cp.parse(t)
    custom_parsed_sentences.append(result)

## Implement your custom entity classification

In [5]:
import wikipedia
    
# extract wanted entities from the tree and put them in a list of queries
queries = []
for s in custom_parsed_sentences:
    for chunk in s:
        if (isinstance(chunk, nltk.tree.Tree) and chunk.label() == 'NP'):
            text = " ".join([word for word, tag in chunk.leaves()])
            queries.append(text)
            # print(text)
            
# remove duplicate entries by using "set"
queries = list(set(queries))

In [10]:
# get results from wikipedia for the term/query
def getWikiInfo(query):
    # search for the sentence on wikipedia, e.g. query = "A car is a wheeled motor vehicle used for transportation."
    results = wikipedia.search(query)
    
    if results:
        try:
            # print the first sentence of the first search result
            return (wikipedia.summary(results[0], sentences=1))
        except wikipedia.exceptions.DisambiguationError as e:
            # there are multiple results for this term, print the first option:
            try:
                return wikipedia.summary(e.options[0], sentences=1)
            except wikipedia.exceptions.DisambiguationError as e:
                # we cannot get any result without further choice, end
                pass

In [11]:
# categorize the entities in a sentence with own grammar
def categorizeSentence(s):
    tokens = nltk.word_tokenize(s)
    tokens_no_punc = [token for token in tokens if token not in punctuation]
    tagged = nltk.pos_tag(tokens_no_punc)

    # detect pattern “… is/VBZ a/DT free/JJ online/NN encyclopedia/NN …“
    grammar = "NP: {<VBZ><DT><JJ>*<NN|NNS>+}"
    cp = nltk.RegexpParser(grammar)

    result = cp.parse(tagged)
    return(result)

In [12]:
# extract text (only "NN") from the supplied tree
def getCategoryText(tree):
    for chunk in tree:
        if (isinstance(chunk, nltk.tree.Tree) and chunk.label() == 'NP'):
            result = ""
            for t in chunk:
                if t[1].startswith("NN"):
                    result += t[0] + " "

            return result.strip()

In [None]:
# for all custom entities in the text, extract info from wiki, categorize it and print it ("None" is unknown category)
for q in queries:
    wikInfo = getWikiInfo(q)
    if wikInfo:
        category = categorizeSentence(wikInfo)
        catText = getCategoryText(category)
    else:
        catText = "None"
    print(q, ": ", catText, "\n")

block :  term 

Available Bets :  online 

problems :  None 

multiplication :  None 

card :  None 

complete analyses :  laboratory instrument 

trivial :  None 

random.Random :  None 

different points :  None 

individual exercises :  None 

triples :  act 

synonyms adaptation :  genus 

external customer :  recipient 

other classes :  term 

red ” :  color 

related numbers :  sum 

values :  degree 

strategy :  plan 

version :  process 

right things :  None 

traditional procedural programming :  design principle 

n combo :  panel van 

circles :  simple 

Considerations :  concept 

money :  item 

technology :  collection 

abstract possibility :  None 

dice game :  None 

careful enumeration :  dispersing prism 

small ball :  term 

sequential collection :  None 

seed :  plant 

few techniques :  time management method 

Blackjack :  None 

simple rule :  sitcom comedy television show 

distinct layers :  role 

exclusive users :  drama series 

hash codes :  functio