### Objectives
YWBAT 
* Scrape a website with infinite scrolling
* Take our scraped data and organize it into a database/datastructure
* apply Markovify to create sentences from our data

In [1]:
import json
import requests
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup # parser html, xml files

In [2]:
def make_brainy_soup(url="https://www.brainyquote.com/topics/inspirational", page=None):
    request = request_brainy_quote(url=url, page=page)
    soup = BeautifulSoup(request.text, 'html.parser')
    return soup

In [3]:
def build_url(url="https://www.brainyquote.com/topics/inspirational", page=None):
    if page is None:
        url = url
    else:
        url = url+"_"+str(page)
    return url

In [4]:
def request_brainy_quote(url="https://www.brainyquote.com/topics/inspirational", page=None):
    url = build_url(url=url, page=page)
    # make request
    request = requests.get(url)
    return request

In [5]:
# get quotes
def get_quotes(quote_tag, verbose=False):
    """
    input
    quote_tag: tag_object <img ...
    
    return
    the alt value quote
    """
    try:
        return "-".join(quote_tag.find("img").attrs["alt"].split("-")[:-1]).strip()
    except Exception as e:
        if verbose:
            print("Could not get quote from {}".format(quote_tag))
            print(e)
        return None

In [6]:
def get_author(quote_tag, verbose=False):
    try:
        return quote_tag.find("img").attrs["alt"].split("-")[-1].strip()
    except Exception as e:
        if verbose:
            print("Could not get author from {}".format(quote_tag))
            print(e)
        pass

### Use functions to create soup objects for brainy quote pages

In [7]:
soups = []
for i in range(1, 18):
    soup = make_brainy_soup(page=i)
    soups.append(soup)

### Use functions to build quotes and authors list

In [8]:
quote_list = []
authors_list = []
for soup in soups:
    quote_tags = soup.find_all("div", class_="qti-listm")
    quotes = list(map(get_quotes, quote_tags))
    authors = list(map(get_author, quote_tags))
    quote_list.extend(quotes)
    authors_list.extend(authors)

In [9]:
print(len(quote_list))
quote_list = [q for q in quote_list if q is not None]
authors_list = [a for a in authors_list if a is not None]
len(quote_list), len(authors_list)

431


(166, 166)

# Quotes from Brainy Quote

In [16]:
quote_list[:10]

['The best and most beautiful things in the world cannot be seen or even touched - they must be felt with the heart.',
 'The best preparation for tomorrow is doing your best today.',
 'It is during our darkest moments that we must focus to see the light.',
 'Clouds come floating into my life, no longer to carry rain or usher storm, but to add color to my sunset sky.',
 'Happiness is not something you postpone for the future; it is something you design for the present.',
 "I can't change the direction of the wind, but I can adjust my sails to always reach my destination.",
 'Put your heart, mind, and soul into even your smallest acts. This is the secret of success.',
 'Health is the greatest gift, contentment the greatest wealth, faithfulness the best relationship.',
 'No act of kindness, no matter how small, is ever wasted.',
 'Change your thoughts and you change your world.']

In [25]:
authors_list[:10]

['Helen Keller',
 'H. Jackson Brown, Jr.',
 'Aristotle',
 'Rabindranath Tagore',
 'Jim Rohn',
 'Jimmy Dean',
 'Swami Sivananda',
 'Buddha',
 'Aesop',
 'Norman Vincent Peale']

# Scraping keepinspiring

In [10]:
def clean_author_quote(author_quote):
    return author_quote.text.replace('“', "").replace('”', "")

In [11]:
def get_author_quotes(soup):
    author_quotes = soup.find_all("div", class_="author-quotes")
    return author_quotes

def make_keep_inspiring_request(url):
    request = requests.get(url)
    return request

def get_keep_inspiring_author_quotes(url="https://www.keepinspiring.me/positive-inspirational-life-quotes/"):
    request = make_keep_inspiring_request(url)
    soup =  BeautifulSoup(request.text, 'html.parser')
    author_quotes = get_author_quotes(soup)
    return list(map(clean_author_quote, author_quotes))

In [12]:
def get_ki_author(text):
    return text.split('–')[-1]

def get_ki_quote(text):
    return '-'.join(text.split('–')[:-1]).strip()

In [13]:
ki_auth_quotes = get_keep_inspiring_author_quotes()

# Quotes from Keep Inspiring

In [17]:
ki_quotes = list(map(get_ki_quote, ki_auth_quotes))
ki_authors = list(map(get_ki_author, ki_auth_quotes))
ki_quotes[:10]

['Life is 10% what happens to us and 90% how we react to it.',
 'There is no royal road to anything. One thing at a time, all things in succession. That which grows fast, withers as rapidly. That which grows slowly, endures.',
 'Be not afraid of life. Believe that life is worth living, and your belief will help create the fact.',
 'When I stand before God at the end of my life, I would hope that I would not have a single bit of talent left and could say, I used everything you gave me.',
 'The quickest way to double your money is to fold it over and put it back in your pocket.',
 'Even if you’re on the right track, you’ll get run over if you just sit there.',
 'When I hear somebody sigh, ‘Life is hard,’ I am always tempted to ask, ‘Compared to what?’',
 'Nurture your mind with great thoughts. To believe in the heroic makes heroes.',
 'The real opportunity for success lies within the person and not in the job.',
 'Look at the sparrows; they do not know what they will do in the next momen

### Put quote_lists_together

In [15]:
quote_list.extend(ki_quotes)
authors_list.extend(ki_authors)
len(quote_list), len(authors_list)

(215, 215)

In [24]:
### Put these in a dataframe and save to a csv
dlist = []
for author, quote in zip(authors_list, quote_list):
    d = {}
    d["author"] = author
    d["quote"] = quote
    dlist.append(d)
df = pd.DataFrame(data=dlist)
df.head()

Unnamed: 0,author,quote
0,Helen Keller,The best and most beautiful things in the worl...
1,"H. Jackson Brown, Jr.",The best preparation for tomorrow is doing you...
2,Aristotle,It is during our darkest moments that we must ...
3,Rabindranath Tagore,"Clouds come floating into my life, no longer t..."
4,Jim Rohn,Happiness is not something you postpone for th...


In [103]:
df.to_csv("files/inspirational_quote_authors.csv")

### Use markovify to create quotes

In [20]:
import markovify

In [21]:
m = markovify.NewlineText("\n".join(quote_list))

# Generated Quotes

In [28]:
generated_quotes = []

In [29]:
while len(generated_quotes) < 100:
    sentence = m.make_short_sentence(max_chars=280, min_chars=20)
    generated_quotes.append(sentence)

In [30]:
generated_quotes[:10]

['Change your thoughts and you change your life had just begun.',
 'We must let go of the soul with itself.',
 'I love those who wake.',
 "Your present circumstances don't determine where you are doing the impossible.",
 'Your heart is full of magical things patiently waiting for us.',
 'There are two ways of spreading light: to be like the turtle - at ease in your own shell.',
 'One today is worth more than I can accomplish.',
 'Love the moment and every event of every hand.',
 'God sleeps in the heroic makes heroes.',
 'Try to be the candle or the day runs you.']

In [31]:
generated_quotes

['Change your thoughts and you change your life had just begun.',
 'We must let go of the soul with itself.',
 'I love those who wake.',
 "Your present circumstances don't determine where you are doing the impossible.",
 'Your heart is full of magical things patiently waiting for us.',
 'There are two ways of spreading light: to be like the turtle - at ease in your own shell.',
 'One today is worth more than I can accomplish.',
 'Love the moment and every event of every hand.',
 'God sleeps in the heroic makes heroes.',
 'Try to be the candle or the day runs you.',
 'It is by acts and not in the next home run.',
 'Be brave enough to live life creatively. The creative place where no one can go back and make a living; what we get, we can catch excellence.',
 'Countless as the human spirit.',
 "Every moment and every event of every man's life on earth plants something in the sky, you look in you.",
 'What lies behind you and what is done in love is done in love is done well.',
 'There is 