In [None]:
"""
Option 1:

from random import choice 

nouns = ("puppy", "car", "rabbit", "girl", "monkey")
verbs = ("runs", "hits", "jumps", "drives", "barfs") 
adv = ("crazily.", "dutifully.", "foolishly.", "merrily.", "occasionally.")
adj = ("adorable", "clueless", "dirty", "odd", "stupid")

# asking user as to how many sentences he would like to generate 
for _ in range (int (input ("Enter integer value :"))): 
    print(list(map(choice, [nouns, verbs, adv, adj])))
    
"""

In [1]:
import time
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt


In [2]:
def unzip(pairs):
    """
    "unzips" of groups of items into separate tuples.
    
    Example: pairs = [("a", 1), ("b", 2), ...] --> (("a", "b", ...), (1, 2, ...))
    
    Parameters
    ----------
    pairs : Iterable[Tuple[Any, ...]]
        An iterable of the form ((a0, b0, c0, ...), (a1, b1, c1, ...))
    
    Returns
    -------
    Tuple[Tuples[Any, ...], ...]
       A tuple containing the "unzipped" contents of `pairs`; i.e. 
       ((a0, a1, ...), (b0, b1, ...), (c0, c1), ...)
    """
    return tuple(zip(*pairs))

In [3]:
def normalize(counter):
    """ Convert a `letter -> count` counter to a list 
    of (letter, frequency) pairs, sorted in descending order of 
    frequency.

    Parameters
    -----------
    counter : collections.Counter
        letter -> count

    Returns
    -------
    List[Tuple[str, float]]
       A list of tuples: (letter, frequency) pairs in order
       of descending-frequency

    Examples
    --------
    >>> from collections import Counter
    >>> letter_count = Counter({"a": 1, "b": 3})
    >>> letter_count
    Counter({'a': 1, 'b': 3})

    >>> normalize(letter_count)
    [('b', 0.75), ('a', 0.25)]
    """

    total = sum(counter.values())
    return [(char, cnt/total) for char, cnt in counter.most_common()]


In [4]:
from collections import defaultdict
from collections import Counter

def train_lm(text, n):
    """ Train character-based n-gram language model.
        
    This will learn: given a sequence of n-1 characters, what the probability
    distribution is for the n-th character in the sequence.

    For example if we train on the text:
        text = "cacao"

    Using a n-gram size of n=3, then the following dict would be returned.
    See that we *normalize* each of the char_count_tuples for a given history

        {'ac': [('a', 1.0)],
         'ca': [('c', 0.5), ('o', 0.5)],
         '~c': [('a', 1.0)],
         '~~': [('c', 1.0)]}

    Tildas ("~") are used for padding the history when necessary, so that it's 
    possible to estimate the probability of a seeing a character when there 
    aren't (n - 1) previous characters of history available.

    So, according to this text we trained on, if you see the sequence 'ac',
    our model predicts that the next character should be 'a' 100% of the time.

    For generating the padding, recall that Python allows you to generate 
    repeated sequences easily: 
       `"p" * 4` returns `"pppp"`

    Parameters
    -----------
    text: str 
        A string (doesn't need to be lowercased).
        
    n: int
        The length of n-gram to analyze.

    Returns
    -------
    Dict[str, List[Tuple[str, float]]]
        
        {n-1 history -> [(letter, normalized count), ...]}
        
        A dictionary that maps histories (strings of length (n-1)) to lists of (char, prob) 
        pairs, where prob is the probability (i.e frequency) of char appearing after 
        that specific history.

    Examples
    --------
    >>> train_lm("cacao", 3)
    {'ac': [('a', 1.0)],
     'ca': [('c', 0.5), ('o', 0.5)],
     '~c': [('a', 1.0)],
     '~~': [('c', 1.0)]}
    """

    raw_lm = defaultdict(Counter) # history -> {char -> count}
    history = "~" * (n - 1)  # length n - 1 history
    
    # count number of times characters appear following different histories
    #
    # for char in text ...
    #    1. Increment language model's count, given current history and character
    #    2. Update history

    for char in text:
        raw_lm[history][char] += 1
        # slide history window to the right by one character
        history = history[1:] + char

    
    # create the finalized language model – a dictionary with: history -> [(char, freq), ...]
    lm = {history : normalize(counter) for history, counter in raw_lm.items()} 
    
    return lm


In [5]:
def generate_letter(lm, history):
    """ Randomly picks letter according to probability distribution associated with 
    the specified history, as stored in your language model.

    Note: returns dummy character "~" if history not found in model.

    Parameters
    ----------
    lm: Dict[str, List[Tuple[str, float]]] 
        The n-gram language model. 
        I.e. the dictionary: history -> [(char, freq), ...]

    history: str
        A string of length (n-1) to use as context/history for generating 
        the next character.

    Returns
    -------
    str
        The predicted character. '~' if history is not in language model.
    """

    if not history in lm:
        return "~"
    letters, probs = unzip(lm[history])
    i = np.random.choice(letters, p=probs)
    return i

In [6]:
def generate_text(lm, n, nletters=100):
    """ Randomly generates `nletters` of text by drawing from 
    the probability distributions stored in a n-gram language model 
    `lm`.

    Parameters
    ----------
    lm: Dict[str, List[Tuple[str, float]]]
        The n-gram language model. 
        I.e. the dictionary: history -> [(char, freq), ...]
    
    n: int
        Order of n-gram model.
    
    nletters: int
        Number of letters to randomly generate.

    Returns
    -------
    str
        Model-generated text. Should contain `nletters` number of
        generated characters. The pre-pended ~'s are not to be included. 
    """
    # <COGINST>
    history = "~" * (n - 1)
    text = []
    for i in range(nletters):
        c = generate_letter(lm, history)
        text.append(c)
        history = history[1:] + c
    return "".join(text)  

## 2. Generating "The Percy Jackson Series"

The next cell loads in pjallbooks.txt file.

In [11]:
from cogworks_data.language import get_data_path

path_to_pj = "/Users/mohan/Desktop/cogworks/bwsi/ryan-sus/capstone/MadLib/pjallbooks.txt"

#get_data_path("pjolympians.txt")

with open(path_to_pj, "rb") as f:
    pj = f.read().decode()  
    pj = pj.lower()  
    pj.split()




percy jackson. mercy has no place for the rest figured out how all those ships cross the mall. i made a silent agreement. none of the gods.’
standing there.
‘thankfully, they had seen quintus poking around them. the
lotus casino for five days.
we had only minutes before answering, he hopped backbiter.
annabeth’s cap out of mine.
‘maybe we’ll see three human-size and he’d already rolled
up the tarp on the sidewalks.
‘she’ll never get boring?’
‘no, no, that’s not important. did he ask you for –’ i looked tyson aside and explained how i’d caused that power,
calling it for a second, i remember your study of mythology mixed up, the stairs. the basement door was half a million pounds of pursuit, but i hesitated, fascinated by the gods, wore a dark suit and his tunic was smudged with green fire twenty-four hours.
i went up to the door swung down like he was distracted. kelli pounced on the cursed blade shall reap. maybe we can’t allow that.
i stripped away
the tin and a little while.
dionysu

In [14]:
lmpj1 = train_lm(pj, 15)
new_text = generate_text(lmpj1, 15,1000)
print(new_text)


percy jackson, it said. yes, the exchange went well, i see.
i was back in the styx, or anticipation of what i was about to end
up embedded in fifty metres to the river and the hunter
were trying to kill each other in
months, but between the woods and would not have come home.’
the vision shut off.
my knees buckled, and i would’ve been terrified. the strange things she would say good morning to
them, ask how it was going to start frothing
at the mouth or shooting spines, but he just bowed awkwardly. ‘i – i won’t answer questions with this
hellhound sniffing my tail!’
nico looked uneasy. ‘you are all true heroes. and as
soon as we get percy fixed up, you must return to half-blood hill, i’ll understand why everyone was eating, chiron made a
surprise announcement: the chariot races!’
murmuring broke out at all the tables – excitement, fear, disbelief.
‘now i know,’ tantalus chided. ‘the monster may be able to do some menial
chores. any suggestions as to where such a beast should be kennel

In [1]:
new_text =" It was the first day of the rest of her life. This wasn't the day she was actually born, but she knew that nothing would be the same from this day forward. Although this was a bit scary to her, it was also extremely freeing."

In [2]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/japleen/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
from nltk import word_tokenize

In [11]:
tok_text = word_tokenize(new_text)
gram = nltk.pos_tag(tok_text)
print(gram)

[('It', 'PRP'), ('was', 'VBD'), ('the', 'DT'), ('first', 'JJ'), ('day', 'NN'), ('of', 'IN'), ('the', 'DT'), ('rest', 'NN'), ('of', 'IN'), ('her', 'PRP$'), ('life', 'NN'), ('.', '.'), ('This', 'DT'), ('was', 'VBD'), ("n't", 'RB'), ('the', 'DT'), ('day', 'NN'), ('she', 'PRP'), ('was', 'VBD'), ('actually', 'RB'), ('born', 'VBN'), (',', ','), ('but', 'CC'), ('she', 'PRP'), ('knew', 'VBD'), ('that', 'IN'), ('nothing', 'NN'), ('would', 'MD'), ('be', 'VB'), ('the', 'DT'), ('same', 'JJ'), ('from', 'IN'), ('this', 'DT'), ('day', 'NN'), ('forward', 'RB'), ('.', '.'), ('Although', 'IN'), ('this', 'DT'), ('was', 'VBD'), ('a', 'DT'), ('bit', 'NN'), ('scary', 'JJ'), ('to', 'TO'), ('her', 'PRP$'), (',', ','), ('it', 'PRP'), ('was', 'VBD'), ('also', 'RB'), ('extremely', 'RB'), ('freeing', 'VBG'), ('.', '.')]


In [12]:
# def Convert(tup, di):
#     di = dict(tup)
#     return di
# # Driver Code    
# gram_dict = {}
# Convert(gram, gram_dict)

In [13]:
from collections import defaultdict

d = defaultdict(list)
for key, value in gram:
    d[key].append(value)

In [14]:
print(d)

defaultdict(<class 'list'>, {'It': ['PRP'], 'was': ['VBD', 'VBD', 'VBD', 'VBD', 'VBD'], 'the': ['DT', 'DT', 'DT', 'DT'], 'first': ['JJ'], 'day': ['NN', 'NN', 'NN'], 'of': ['IN', 'IN'], 'rest': ['NN'], 'her': ['PRP$', 'PRP$'], 'life': ['NN'], '.': ['.', '.', '.'], 'This': ['DT'], "n't": ['RB'], 'she': ['PRP', 'PRP'], 'actually': ['RB'], 'born': ['VBN'], ',': [',', ','], 'but': ['CC'], 'knew': ['VBD'], 'that': ['IN'], 'nothing': ['NN'], 'would': ['MD'], 'be': ['VB'], 'same': ['JJ'], 'from': ['IN'], 'this': ['DT', 'DT'], 'forward': ['RB'], 'Although': ['IN'], 'a': ['DT'], 'bit': ['NN'], 'scary': ['JJ'], 'to': ['TO'], 'it': ['PRP'], 'also': ['RB'], 'extremely': ['RB'], 'freeing': ['VBG']})


['It',
 'was',
 'the',
 'first',
 'day',
 'of',
 'rest',
 'her',
 'life',
 '.',
 'This',
 "n't",
 'she',
 'actually',
 'born',
 ',',
 'but',
 'knew',
 'that',
 'nothing',
 'would',
 'be',
 'same',
 'from',
 'this',
 'forward',
 'Although',
 'a',
 'bit',
 'scary',
 'to',
 'it',
 'also',
 'extremely',
 'freeing']

In [None]:
list_words = []
for i in range(1,6):
    list_words.append(input("Please enter noun " + str(i) + ": ")) 

In [8]:
for i in range(len(list_words)):
    print(list_words[i])

grapes
orange
peach
banana
apple


In [11]:
counter = 0

# PRP: It
# VBD: the
#NN : apple
print(g)
g_swap = {v: k for k, v in g.items()} # swap the values and the keys 
print("")
print(g_swap)

for key, value in list(g_swap.items()):
#     print(value)
#     print(key)
    if key == 'NN' and counter < 5 :
        g_swap[key] = list_words[counter]
        #print(list_words[counter])
        #print(g[key])
        counter += 1

new_line = list(g_swap.values()) 

print(new_line)



{'It': 'PRP', 'was': 'VBD', 'the': 'DT', 'first': 'JJ', 'day': 'NN', 'of': 'IN', 'rest': 'NN', 'her': 'PRP$', 'life': 'NN', '.': '.', 'This': 'DT', "n't": 'RB', 'she': 'PRP', 'actually': 'RB', 'born': 'VBN', ',': ',', 'but': 'CC', 'knew': 'VBD', 'that': 'IN', 'nothing': 'NN', 'would': 'MD', 'be': 'VB', 'same': 'JJ', 'from': 'IN', 'this': 'DT', 'forward': 'RB', 'Although': 'IN', 'a': 'DT', 'bit': 'NN', 'scary': 'JJ', 'to': 'TO', 'it': 'PRP', 'also': 'RB', 'extremely': 'RB', 'freeing': 'VBG'}

{'PRP': 'it', 'VBD': 'knew', 'DT': 'a', 'JJ': 'scary', 'NN': 'bit', 'IN': 'Although', 'PRP$': 'her', '.': '.', 'RB': 'extremely', 'VBN': 'born', ',': ',', 'CC': 'but', 'MD': 'would', 'VB': 'be', 'TO': 'to', 'VBG': 'freeing'}
['it', 'knew', 'a', 'scary', 'grapes', 'Although', 'her', '.', 'extremely', 'born', ',', 'but', 'would', 'be', 'to', 'freeing']


In [None]:
"""i_of_nouns = []

for key, value in list(g.items()):
    if value == 'NN':
            del g[key]
   
   """

## Working on making the actual game

1. remove random nouns using spacy
2. find the pos of the random words
3. either leave word blank for user to fill in
4. or have computer generate words

## 3. Generating "Dr. Suess' Work"

The next cell loads in drsuess.txt file.

In [30]:
import pandas as pd

path_to_seuss = "/Users/mohan/Desktop/cogworks/bwsi/ryan-sus/capstone/MadLib/drseuss.txt"

seuss = pd.read_csv(path_to_seuss, encoding= 'unicode_escape')

#with open(path_to_suess, "rb") as f:
seuss = seuss.read().decode()  
seuss = seuss.lower()
seuss.split()
    


print(str(len(suess)) + " character(s)")

ParserError: Error tokenizing data. C error: Expected 1 fields in line 8, saw 3
