In [1]:
import requests
import urllib
import pandas as pd
from requests_html import HTML
from requests_html import HTMLSession

In [4]:
def get_source(url):
    """Return the source code for the provided URL. 

    Args: 
        url (string): URL of the page to scrape.

    Returns:
        response (object): HTTP response object from requests_html. 
    """

    try:
        session = HTMLSession()
        response = session.get(url)
        return response

    except requests.exceptions.RequestException as e:
        print(e)

In [91]:
def scrape_google(query):

    query = urllib.parse.quote_plus(query)
    response = get_source("https://www.google.co.uk/search?q=" + query)

    links = list(response.html.absolute_links)
    google_domains = ('https://www.google.', 
                      'https://google.', 
                      'https://webcache.googleusercontent.', 
                      'http://webcache.googleusercontent.', 
                      'https://policies.google.',
                      'https://support.google.',
                      'https://maps.google.')

    for url in links[:]:
        if url.startswith(google_domains):
            links.remove(url)

    return links

In [92]:
query = 'vanitas meaning'

links = scrape_google(query)

In [9]:
results = []
for link in links:
    results.append(requests.get(link))

In [93]:
links

['https://www.thoughtco.com/vanitas-painting-definition-183179',
 'https://www.xamou-art.com/word/vanitas/',
 'https://www.collinsdictionary.com/dictionary/english/vanitas',
 'https://en.wikipedia.org/wiki/Vanitas#Outside_visual_art',
 'https://languages.oup.com/google-dictionary-en',
 'https://www.yourdictionary.com/vanitas',
 'https://www.nationalgallery.org.uk/paintings/glossary/vanitas',
 'https://www.britannica.com/art/vanitas-art',
 'https://en.wikipedia.org/wiki/Vanitas#In_modern_times',
 'https://alicefryart.wordpress.com/2016/11/16/inspiration-vanitas-painting-and-the-symbolism-of-objects/',
 'https://www.tate.org.uk/art/art-terms/v/vanitas',
 'https://en.wikipedia.org/wiki/Vanitas#Themes',
 'https://en.wikipedia.org/wiki/Vanitas',
 'https://en.wikipedia.org/wiki/Vanitas#Motifs',
 'https://www.definitions.net/definition/vanitas']

In [19]:
results[1].text

'<!DOCTYPE html>\n<!--[if lt IE 7 ]> <html lang="en" class="no-js ie6"> <![endif]-->\n<!--[if IE 7 ]>    <html lang="en" class="no-js ie7"> <![endif]-->\n<!--[if IE 8 ]>    <html lang="en" class="no-js ie8"> <![endif]-->\n<!--[if IE 9 ]>    <html lang="en" class="no-js ie9"> <![endif]-->\n<!--[if (gt IE 9)|!(IE)]><!--> \n<html lang="en-GB" prefix="og: http://ogp.me/ns#" class="no-js"> <!--<![endif]-->\n<head>\n\t<meta charset="UTF-8">\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0">\n\t<meta name="google-site-verification" content="Al454iq0XKoTEO111ktDAFH7d3oQ9IaQrJlai7dOMUQ" />\n<title>What is Vanitas? An art term defined</title>\n\n<!-- This site is optimized with the Yoast SEO plugin v3.3.1 - https://yoast.com/wordpress/plugins/seo/ -->\n<meta name="description" content="A type of symbolic work of art, popular in Flanders and the Netherlands in the 16th and 17th centuries. However, Vanitas appear in other historical periods."/>\n<meta name="robots" content="

In [26]:
import html_to_json
output_json = html_to_json.convert(results[1].text)

In [54]:
import html2text
h = html2text.HTML2Text()
h.ignore_links = True

In [60]:
def extract_text(html):
    return h.handle(html).replace('\n', ' ').replace("(", ' ').replace(')', ' ').replace('\\', ' ')

In [80]:
def get_content(link):
    return requests.get(link).text

In [81]:
def search(query):
    links = scrape_google(query)
    
    texts = []
    print(links[0])
    for i in range(len(links)):
        print(links[i])
        html = get_content(links[i])
        texts.append(extract_text(html))
    return texts

In [82]:
texts = search(query)

https://www.thoughtco.com/vanitas-painting-definition-183179
https://www.thoughtco.com/vanitas-painting-definition-183179
https://www.xamou-art.com/word/vanitas/
https://www.collinsdictionary.com/dictionary/english/vanitas
https://en.wikipedia.org/wiki/Vanitas#Outside_visual_art
https://languages.oup.com/google-dictionary-en
https://www.yourdictionary.com/vanitas
https://www.nationalgallery.org.uk/paintings/glossary/vanitas
https://www.britannica.com/art/vanitas-art
https://en.wikipedia.org/wiki/Vanitas#In_modern_times
https://alicefryart.wordpress.com/2016/11/16/inspiration-vanitas-painting-and-the-symbolism-of-objects/
https://www.tate.org.uk/art/art-terms/v/vanitas
https://en.wikipedia.org/wiki/Vanitas
https://en.wikipedia.org/wiki/Vanitas#Motifs
https://www.definitions.net/definition/vanitas
https://en.wikipedia.org/wiki/Vanitas#Themes


In [94]:
texts

 '  ![] https://www.xamou-art.com/wp-content/uploads/2016/06/xamou-art- bubbles-2x439x91.png  ![xamou art] https://www.xamou-art.com/wp- content/uploads/2017/05/xamou-art-logo-17.png     *  Support   *  login  Toggle navigation    * art exhibitions & events   * art venues   * editorial   * artists   * art movements   * art world  __    *  Support   *  login  ![] https://www.xamou-art.com/wp-content/uploads/2013/05/vanitas-496x273.jpg   # Vanitas  A type of symbolic work of art which was very popular in Flanders and the Netherlands in the 16th and 17th centuries. However, Vanitas have been commonplace in other historical periods.  The Latin word means "vanity" but refers to the transient nature of our lives. When artists depict that they, draw on some references that we all know to be elusive, transient or ephemeral. Artists may incorporate in their art: skulls, decaying flesh, wilting flowers, Turning grapes, chronographs, the contrast between young and old sitters, crash test dummies 