In [1]:
from music21 import stream, interval, corpus, instrument, pitch
from music21 import converter, note, chord, environment, duration, key
import notebook
import argparse
import pandas as pd
import string
import pathlib
from sklearn import preprocessing
import numpy as np
import json
import re
import random

In [2]:
filename = "/Users/DWBZe/Documents/Docs/Careers/data/editions.json"
fp = open(filename, "r")
jtxt = fp.read()
editions = json.loads(jtxt)
editions

{'Hi-Tech Edition': {'version': '1.1',
  'Help': 'Rule Book v2 booklet.pdf',
  'Game Parameters': 'gameParameters.json',
  'Opportunity Cards': 'opportunityCards.json',
  'Experience Cards': 'experienceCards.json',
  'Occupations': 'occupations.json',
  'Players': 'players.json',
  'Border Squares': 'borderSquares.json',
  'Rules': 'rules.json'},
 'Destination London': {'version': '1.0',
  'Help': 'Rule Book Destination London.pdf',
  'Game Parameters': 'gameParameters_UK.json',
  'Opportunity Cards': 'opportunityCards_UK.json',
  'Experience Cards': 'experienceCards_UK.json',
  'Occupations': 'occupations_UK.json',
  'Players': 'players.json',
  'Border Squares': 'borderSquares_UK.json',
  'Rules': 'rules_UK.json'}}

In [3]:
print(editions.keys())
print(editions['Hi-Tech Edition'])

dict_keys(['Hi-Tech Edition', 'Destination London'])
{'version': '1.1', 'Help': 'Rule Book v2 booklet.pdf', 'Game Parameters': 'gameParameters.json', 'Opportunity Cards': 'opportunityCards.json', 'Experience Cards': 'experienceCards.json', 'Occupations': 'occupations.json', 'Players': 'players.json', 'Border Squares': 'borderSquares.json', 'Rules': 'rules.json'}


In [4]:
fp = open("/Users/DWBZe/Documents/Docs/Careers/data/gameParameters.json", "r")
jtxt = fp.read()
game_parameters = json.loads(jtxt)
game_parameters

{'starting_salary': 2000,
 'starting_cash': 2000,
 'currency': 'dollars',
 'currency_symbol': '$',
 'starting_experience_cards': 0,
 'starting_opportunity_cards': 0,
 'default_game_points': 100,
 'timed_game': 0,
 'default_game_minutes': 60}

In [5]:
df = pd.DataFrame({
    'id': [0, 1, 2],
    'cats': [['A','B','C'],
             ['U','O','T'],
             ['T','C','U']]
})
df

Unnamed: 0,id,cats
0,0,"[A, B, C]"
1,1,"[U, O, T]"
2,2,"[T, C, U]"


In [9]:
#
# id  A B C U O T
#  0  1 1 1 0 0 0
#  1  0 0 0 1 1 1
#  2  0 0 1 1 0 1
exp = df.explode('cats')
exp

Unnamed: 0,id,cats
0,0,A
0,0,B
0,0,C
1,1,U
1,1,O
1,1,T
2,2,T
2,2,C
2,2,U


In [10]:
pd.crosstab(exp['id'], exp['cats'])

cats,A,B,C,O,T,U
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,1,1,0,0,0
1,0,0,0,1,1,1
2,0,0,1,0,1,1


In [12]:
pd.get_dummies(exp['cats'])

Unnamed: 0,A,B,C,O,T,U
0,1,0,0,0,0,0
0,0,1,0,0,0,0
0,0,0,1,0,0,0
1,0,0,0,0,0,1
1,0,0,0,1,0,0
1,0,0,0,0,1,0
2,0,0,0,0,1,0
2,0,0,1,0,0,0
2,0,0,0,0,0,1


### pack and unpack

In [9]:
#
# merge 2 dictionaries
# *  is unpacking operator for iterable objects
# ** unpacks dictionaries
#
x = {'a': 1, 'b':2}
y = {'c': 3, 'd': 4}
{**x,**y}

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [18]:
nums1 = [1,2,3,4,5]
nums2 = [6,7,8,9,10]
print(nums1)
print(*nums1)
# merge lists
nums = [*nums1, *nums2]
nums

[1, 2, 3, 4, 5]
1 2 3 4 5


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [22]:
name = 'Don Bacon'
first, *middle, last = name
print(f'{first}\t{middle}\t{last}')

D	['o', 'n', ' ', 'B', 'a', 'c', 'o']	n


In [26]:
# pack using *
*names, = 'Don','Fred','Karen'
names

['Don', 'Fred', 'Karen']

In [42]:
def some_func(**kwargs):
    if 'reverse' in kwargs:
        v = kwargs['reverse']
        return v[len(v)::-1]

In [47]:
x = "abcdef"
some_func(reverse='Donald')

'dlanoD'

### Generators, NLTK

In [6]:
 # generator expression 
nums_squared_gc = (num**2 for num in range(5))

In [7]:
next(nums_squared_gc)

0

In [6]:
#
# reverse the digits of a positive number
#
def reverse(num):
    s = str(num)
    return int(s[len(s)::-1])

In [7]:
reverse(377821)

128773

In [32]:
import nltk
from nltk import word_tokenize, wordpunct_tokenize, regexp_tokenize
import string
# nltk.download('punkt')   # only do once
punct =  string.punctuation.replace("'", "")
punct = punct.replace('-', '')
punct = punct + '“‘’”'

def remove_punctuation(txt):
    nopunc = [char for char in txt if char not in punct]
    # Join the characters again to form the string.
    return ''.join(nopunc)

In [60]:
# generator to read large files
#
filename = "/data/text/ferlinghetti.txt"
file_lines = (row for row in open(filename, "r"))
for l in range(10):
    print(f'{l}: {next(file_lines)} ')
file_lines.close()
# next(file_lines) # throws StopIteration

0: The world is a beautiful place:
 
1: The world is a beautiful place
 
2: to be born into
 
3: if you don't mind happiness
 
4: not always being
 
5: so very much fun
 
6: if you don't mind a touch of hell
 
7: now and then
 
8: just when everything is fine
 
9: because even in heaven
 


In [61]:
filename = "/data/text/ferlinghetti.txt"
fp = open(filename, "r")
txt = fp.read()

txt = remove_punctuation(txt)

tokens = []
for l in txt.splitlines():
    print(f'line: {l}')
    tokens += l.split(' ')
tokens[:100]

line: The world is a beautiful place
line: The world is a beautiful place
line: to be born into
line: if you don't mind happiness
line: not always being
line: so very much fun
line: if you don't mind a touch of hell
line: now and then
line: just when everything is fine
line: because even in heaven
line: they don't sing
line: all the time
line: 
line: The world is a beautiful place
line: to be born into
line: if you don't mind some people dying
line: all the time
line: or maybe only starving
line: some of the time
line: which isn't half bad
line: if it isn't you
line: 
line: Oh the world is a beautiful place
line: to be born into
line: if you don't much mind
line: a few dead minds
line: in the higher places
line: or a bomb or two
line: now and then
line: in your upturned faces
line: or such other improprieties
line: as our Name Brand society
line: is prey to
line: with its men of distinction
line: and its men of extinction
line: and its priests
line: and other patrolmen
line: 
line: and

['The',
 'world',
 'is',
 'a',
 'beautiful',
 'place',
 'The',
 'world',
 'is',
 'a',
 'beautiful',
 'place',
 'to',
 'be',
 'born',
 'into',
 'if',
 'you',
 "don't",
 'mind',
 'happiness',
 'not',
 'always',
 'being',
 'so',
 'very',
 'much',
 'fun',
 'if',
 'you',
 "don't",
 'mind',
 'a',
 'touch',
 'of',
 'hell',
 'now',
 'and',
 'then',
 'just',
 'when',
 'everything',
 'is',
 'fine',
 'because',
 'even',
 'in',
 'heaven',
 'they',
 "don't",
 'sing',
 'all',
 'the',
 'time',
 '',
 'The',
 'world',
 'is',
 'a',
 'beautiful',
 'place',
 'to',
 'be',
 'born',
 'into',
 'if',
 'you',
 "don't",
 'mind',
 'some',
 'people',
 'dying',
 'all',
 'the',
 'time',
 'or',
 'maybe',
 'only',
 'starving',
 'some',
 'of',
 'the',
 'time',
 'which',
 "isn't",
 'half',
 'bad',
 'if',
 'it',
 "isn't",
 'you',
 '',
 'Oh',
 'the',
 'world',
 'is',
 'a',
 'beautiful',
 'place',
 'to']

In [36]:
print(len(txt))
tokens = word_tokenize(txt)
print(f'{len(tokens)} tokens')
print(tokens)

18342
3536 tokens
['The', 'world', 'is', 'a', 'beautiful', 'place', 'The', 'world', 'is', 'a', 'beautiful', 'place', 'to', 'be', 'born', 'into', 'if', 'you', 'do', "n't", 'mind', 'happiness', 'not', 'always', 'being', 'so', 'very', 'much', 'fun', 'if', 'you', 'do', "n't", 'mind', 'a', 'touch', 'of', 'hell', 'now', 'and', 'then', 'just', 'when', 'everything', 'is', 'fine', 'because', 'even', 'in', 'heaven', 'they', 'do', "n't", 'sing', 'all', 'the', 'time', 'The', 'world', 'is', 'a', 'beautiful', 'place', 'to', 'be', 'born', 'into', 'if', 'you', 'do', "n't", 'mind', 'some', 'people', 'dying', 'all', 'the', 'time', 'or', 'maybe', 'only', 'starving', 'some', 'of', 'the', 'time', 'which', 'is', "n't", 'half', 'bad', 'if', 'it', 'is', "n't", 'you', 'Oh', 'the', 'world', 'is', 'a', 'beautiful', 'place', 'to', 'be', 'born', 'into', 'if', 'you', 'do', "n't", 'much', 'mind', 'a', 'few', 'dead', 'minds', 'in', 'the', 'higher', 'places', 'or', 'a', 'bomb', 'or', 'two', 'now', 'and', 'then', 'in',

In [49]:
txt = "The world is a beautiful place\nto be born into\nif you don't mind happiness"
tokens = []
for l in txt.splitlines():
    tokens += l.split(' ')
print(tokens)

['The', 'world', 'is', 'a', 'beautiful', 'place', 'to', 'be', 'born', 'into', 'if', 'you', "don't", 'mind', 'happiness']


#### TextParser

In [5]:
from dwbzen  import *
from dwbzen.common import TextParser
from nltk.corpus import stopwords
from nltk import word_tokenize

In [6]:
filename = "/data/text/Followed By Madness (parts 1 2).txt"
fp = open(filename, "r")
txt = fp.read()
text_parser = TextParser(source=filename, remove_stop_words=False)

In [7]:
word_counts = text_parser.get_word_counts(sort_counts=True, reverse=True)
print(text_parser.counts_df.head(10))

  word  count
0  the   1307
1    a    758
2  and    753
3    i    695
4   of    562
5   to    478
6   my    424
7   in    379
8  she    240
9  her    225


In [8]:
sentences = text_parser.get_sentences()
print(f'{len(sentences)} sentences\n ')
for s in sentences:
    print(s)
    print(TextParser.remove_quotes(s))
    #print(word_tokenize(s))
    print()


2178 sentences
 
My grandfather appoints me an honorary electrical engineer for Niagara Mohawk power company.
My grandfather appoints me an honorary electrical engineer for Niagara Mohawk power company.

Not a bad job for a six-year-old kid.
Not a bad job for a six-year-old kid.

"Of course, you'll need training," he tells me in a very serious voice.
Of course, you'll need training, he tells me in a very serious voice.

"Could take years."
Could take years.

"Years?"
Years?

He pulls a filter cigarette from his shirt pocket and lights it.
He pulls a filter cigarette from his shirt pocket and lights it.

A blue smoke haze settles around his head like a forlorn halo.
A blue smoke haze settles around his head like a forlorn halo.

"Now don't get excited," he says.
Now don't get excited, he says.

"I'll teach you the ropes."
I'll teach you the ropes.

He snickers and looks around the way he usually does when he's about do something that will annoy my folks.
He snickers and looks around the

I got it all figured out.

He points to his head and taps his temple several times to unloose some high-power brain energy.
He points to his head and taps his temple several times to unloose some high-power brain energy.

"We can't miss."
We can't miss.




Mancuso's Toy Barn is on Main Street, about half a mile away.
Mancuso's Toy Barn is on Main Street, about half a mile away.

Our group of five kids, with Terry leading the way, ride into the gravel parking lot like a bunch of criminals on holiday.
Our group of five kids, with Terry leading the way, ride into the gravel parking lot like a bunch of criminals on holiday.

There are a few cars around, but no one is in sight.
There are a few cars around, but no one is in sight.

Terry huddles the group and explains his strategy.
Terry huddles the group and explains his strategy.

"Billy, you take Ken and hang out on aisle five."
Billy, you take Ken and hang out on aisle five.

"Hang out and do what?"
Hang out and do what?

"I don't know.

I inch my way slowly toward the door when I remember the dog Elektra.

The little mutt is sitting on a chair next to the door, watching me.
The little mutt is sitting on a chair next to the door, watching me.

Still I edge toward the door, one foot at a time.
Still I edge toward the door, one foot at a time.

When I'm almost within striking distance, I realize I've left my Easter seals; not all of them, but twenty books on the coffee table.
When I'm almost within striking distance, I realize I've left my Easter seals; not all of them, but twenty books on the coffee table.

"I'm coming, Gustav.
I'm coming, Gustav.

Your little angel Judith is almost ready."
Your little angel Judith is almost ready.

Do I run back and get them?
Do I run back and get them?

Sweat begins to roll down my brow and into my eyes, and that nasty little dog must smell my fear and starts to growl.
Sweat begins to roll down my brow and into my eyes, and that nasty little dog must smell my fear and starts to growl.

Nice going, I say, lacking a proper insult.




Scott slings more choice epithets, insisting that it didn't make any difference anyway as it produced no Magic Sea Creatures whatsoever.
Scott slings more choice epithets, insisting that it didn't make any difference anyway as it produced no Magic Sea Creatures whatsoever.

My sister has lost all interest, apparently deciding that building a snowman has more fun potential than fake sea creatures.
My sister has lost all interest, apparently deciding that building a snowman has more fun potential than fake sea creatures.




Debbie doesn't last long outside.
Debbie doesn't last long outside.

The bitter cold soon forces her inside where she joins Scott and myself huddled over the large living room heating grate.
The bitter cold soon forces her inside where she joins Scott and myself huddled over the large living room heating grate.

We trade dirty stories as my sister giggles and threatens to tell my mom.
We trade dirty stories as my sister


"Have a smoke," he says.
Have a smoke, he says.




"I better not.
I better not.

I'm on duty."
I'm on duty.

I try and sound official, like my entire future depends on my reputation as a school safety, even though a butt would give me something to brag about with Scott and make me look really hip in front of my friends.
I try and sound official, like my entire future depends on my reputation as a school safety, even though a butt would give me something to brag about with Scott and make me look really hip in front of my friends.

"Come on, don't be a dope," whines Tim.
Come on, don't be a dope, whines Tim.

"I thought you were cool.
I thought you were cool.

Kevin said you were cool."
Kevin said you were cool.




"Say no more."
Say no more.

Kevin advertises his coolness with that trademark phrase.
Kevin advertises his coolness with that trademark phrase.

"If he says he doesn't want a smoke, then he doesn't want a smoke.
If he says he doesn't want a smoke, then he doesn't want a sm

In [48]:
s = "Of course, you'll need training, he tells me in a very serious voice?"
words_re = re.compile(r'\W+')
ws = re.split(words_re, s)
words = [w for w in re.split(words_re, s) if len(w)>0]
if s[-1] in ".?!":
    words.append(s[-1])

In [10]:
words = text_parser.get_words()

In [5]:
lines = text_parser.get_lines()
print(f'{len(lines)} lines\n ')
i = 1
for s in lines: 
    print(f'{i}\t{s}')
    i+=1


50 lines
 
1	My grandfather appoints me an honorary electrical engineer for Niagara Mohawk power company. Not a bad job for a six-year-old kid.
2	 "Of course, you'll need training," he tells me in a very serious voice. "Could take years."
3	 "Years?"
4	 He pulls a filter cigarette from his shirt pocket and lights it. A blue smoke haze settles around his head like a forlorn halo. "Now don't get excited," he says. "I'll teach you the ropes." He snickers and looks around the way he usually does when he's about do something that will annoy my folks. "To start with, an expert pole jockey needs one of these." He rolls up his sleeve and shows off a fading tattoo he got while in the navy. The faded figure of a blue mermaid swims from elbow to wrist on Pa's left forearm. 
5	 I stare down at my own painfully skinny arm. "I don't think Dad would let me get a tattoo," I say in a pleading kind of voice.
6	 Pa comes to rescue, "No, I suppose not. Better wait until you're a bit older. In the meantime

In [6]:
words = text_parser.get_words()
print(words[:10])

['grandfather', 'appoints', 'honorary', 'electrical', 'engineer', 'niagara', 'mohawk', 'power', 'company', 'bad']


In [7]:
all_words = text_parser.get_all_words()
print(all_words[:10])

['my', 'grandfather', 'appoints', 'me', 'an', 'honorary', 'electrical', 'engineer', 'for', 'niagara']


In [22]:
filename = "/data/text/ferlinghetti.txt"
fp = open(filename, "r")
txt = fp.read()
text_parser = TextParser(source=filename, maxlines=50, remove_stop_words=True)
word_counts = text_parser.get_word_counts(sort_counts=True, reverse=True)
print(text_parser.counts_df.head(10))

        word  count
0      place      5
1      world      5
2       mind      4
3  beautiful      4
4       born      3
5      scene      3
6     making      3
7       time      3
8        fun      2
9       much      2


In [24]:
sentences = text_parser.get_sentences()
print(f'{len(sentences)} sentences\n ')
#for s in sentences: print(s)

50 sentences
 


#### DataFrame manipulation

In [45]:
df = pd.DataFrame(columns=['word','count', 'start_sentence','end_sentence'])
row = {'word':'Don','count':10,'start_sentence':True, 'end_sentence':True}
df

Unnamed: 0,word,count,start_sentence,end_sentence


In [53]:
df2 = pd.DataFrame.from_records(data=[row], columns=['word','count', 'start_sentence','end_sentence'])
df2

Unnamed: 0,word,count,start_sentence,end_sentence
0,Don,10,True,True


In [55]:
df = pd.concat([df,df2])
row = {'word':'Karen','count':5,'start_sentence':True, 'end_sentence':False}
df2 = pd.DataFrame.from_records(data=[row], columns=['word','count', 'start_sentence','end_sentence'])
df = pd.concat([df,df2])
df

Unnamed: 0,word,count,start_sentence,end_sentence
0,Don,10,True,True
0,Karen,5,True,False


In [8]:
l = ['blackout','my']
col_str = 'grandfather'


In [2]:
import markovify
# Get raw text as string.
with open("/data/text/Followed By Madness (parts 1 2).txt") as f:
    text = f.read()

# Build the model.
text_model = markovify.NewlineText(text, state_size=2)


In [3]:
# Print five randomly-generated sentences
for i in range(5):
    print(text_model.make_sentence())

# Print three randomly-generated sentences of no more than 280 characters
for i in range(3):
    print(text_model.make_short_sentence(280))

I try to concentrate.
I miss the rest of the building. I find the basement empty except for the rest of my own. Kevin shows me how to french inhale.
I impulsively reach out to the apparent amusement of its fellow beetles in the crook of the building. I find an unexpected stairway leading to another set of doors, hoping to circle around to the vestry and fly into the night, the wailing sirens fading into Mrs. O'Reily's terrified voice.
In our quest for dark places, the best we come up with ping-pong balls. Except there is no scientist here, only my dad, and he is fighting back tears.
That's not what I meant, but I have long since stopped wondering about my folks. The church is a white collar that chokes their stout thorax. It seems like I'm inside another television set. My view is a vice.
I don't want my mom snaps out of my classmates look up, jolted by the sudden sound.
I decide that I move in closer to the furnace and stare into mine, and my torn blue jeans.
We both stare incredulous

In [13]:
chain_df = pd.read_csv('/Compile/dwbzen/resources/text/madnessText_wordsChain.csv')
chain_df.head()

Unnamed: 0,KEY,appoints,me,an,honorary,electrical,engineer,for,niagara,mohawk,...,skeptical,doesn,bid,fond,bon,voyage,chorus,lipstick,your,collar
0,my grandfather,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,grandfather appoints,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,appoints me,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,me an,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,an honorary,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
