In [None]:
# prereqs
app_user_agent = "user_agent"
app_client_id = "client_id"
app_client_secret = "client_secret"

In [4]:
# get reddit instance
import praw

reddit = praw.Reddit(user_agent=app_user_agent,
                     client_id=app_client_id,
                     client_secret=app_client_secret)

## Scrape text from r/dreams

In [6]:
def get_hot_posts(subreddit, num):
    """Get top ranked posts"""
    text = ""
    for submission in reddit.subreddit(subreddit).hot(limit=num):
        text += submission.selftext
    return text

In [7]:
def get_random_post(subreddit):
    submission = reddit.subreddit(subreddit).random()
    return submission.selftext

In [17]:
def write_posts_to_file(subreddit, quantity, filename):
    """Build file for easier data processing"""
    for x in range(0, quantity):
        text = get_random_post(subreddit)
        # append to file
        with open(filename,'a') as f:
            f.write(text)    

In [18]:
write_posts_to_file('dreams', 5000, "dreamstext")

## Text Parsing and Cleanup

In [20]:
import nltk
# tokenize reddit text into sentences
nltk.download('punkt')
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/bethydiakabana/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [21]:
# remove meta dream sentences to make it resemble a real journal
substrings = ['dream', 'dreams', 'wake', 'woke', 'sleep', 'sleeping', 'nightmare']

In [22]:
def strip_dream_from_sentences(text):
    """skips over words with substrings in them 
    when writing the final output"""
    output = ""
    for sentence in sent_detector.tokenize(text):
        if any(substring in sentence for substring in substrings):
            continue;
        else:
            output += " "
            output += sentence
    return output                    

In [23]:
def get_output(filename):
    with open(filename) as f:
        text = f.read()
    return text

In [24]:
output_text = get_output("dreamstext")

In [33]:
print(output_text[0:435])

My son is just shy of 5 months. My dog is 3 years old and I've had him since he was a puppy. This dream is haunting me today so I need to get it off my chest. It's kind of graphic.

In my dream: My husband and I decided to take our son and dog to a themepark. We decided to take them on a rollercoaster. My husband sat in front of me while I had my son in an infant carrier (which I do use in real life) and I was holding onto my dog. 


## Generate "Dear Diary"

In [34]:
import datetime
import markovify
import random

In [35]:
def generate_paragraph(clean_text):
    """Create generation model"""
    text_model = markovify.Text(clean_text)
    num_sentences = random.randint(5,25)
    paragraph = ""
    
    # print varying number of randomly generated sentences
    for i in range(num_sentences):
        paragraph += " "
        paragraph += text_model.make_sentence()
    
    return paragraph

In [42]:
# test randomly generated paragraph
print(strip_dream_from_sentences(generate_paragraph(output_text)))

  Anyway, we get placed in the corridor outside my window and run fast in a way out, every lift and slope leads to the war. Both bro and sis hugged me and I'm a teenager and me and I am suddenly in a room, or I might have gotten samples in the eye and then suddenly I felt so proud this man looked to be harmed to be harmed, say that this isn’t real. I was constantly yelling for doctors and nurses do their thing. It was one of the village on a streaming service called fullscreen. I decided that the entire plain a grey, darkened look. They’re always scary and intense, or at too high of a lack of oxygen. I run away and didn’t know what Doc Ock was doing in the throat, I felt my heart was pounding. So to start with it. So I was downstairs looking up at the flenser's underprotected legs. We were assigned to the ground grabs my left but early morning the next ring I get really drunk but it's more than average probably. Up there I meet Michael. They’re always scary and the next day. It was the

In [43]:
def get_start_date():
    """Get a random date in the past"""
    past_date = random.randint(1000, 15000)
    startdate = datetime.datetime.now() - datetime.timedelta(past_date)
    return startdate

In [51]:
def get_next_date(current_date):
    """Gets the next diary entry date by 
    progressing the diary every few days"""
    added_days = random.randint(1, 5)
    dry_date = current_date + datetime.timedelta(days=added_days)
    return diary_date

In [52]:
def generate_novel(novel_name, novel_length):
    start_date = get_start_date()
    entry = strip_dream_from_sentences(generate_paragraph(output_text))
    with open(novel_name, 'a') as f:
        f.write(start_date.strftime("%B %d, %Y") + '\n')
        f.write(entry + '\n\n')
    for x in range(novel_length):
        start_date = get_next_date(start_date)
        entry = strip_dream_from_sentences(generate_paragraph(output_text))
        with open(novel_name, 'a') as f:
            f.write(start_date.strftime("%B %d, %Y") + '\n')
            f.write(entry + '\n\n')

In [53]:
generate_novel("dear_diary", 365)