In [3]:
from pytrends.request import TrendReq
from bs4 import BeautifulSoup
import urllib

import numpy as np
import pandas as pd

import time
import os
import multiprocessing
from multiprocessing import Pool

pytrends = TrendReq(hl='en-US', tz=360)

# Petite exploration du package pytrends

In [4]:
bitcoin_keywords = ['blockchain', 'bitcoin', 'btc', 'cryptocurrency', 'hodl']

In [4]:
a = pytrends.build_payload(bitcoin_keywords, cat=0, timeframe='2017-05-01 2017-08-01', geo='', gprop='')

In [5]:
pytrends.interest_over_time()

Unnamed: 0_level_0,blockchain,bitcoin,btc,cryptocurrency,hodl,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-05-01,3,23,8,1,0,False
2017-05-02,4,25,8,1,0,False
2017-05-03,4,26,8,1,0,False
2017-05-04,4,33,10,2,0,False
2017-05-05,4,31,10,2,0,False
2017-05-06,3,25,9,2,0,False
2017-05-07,3,25,8,2,0,False
2017-05-08,4,28,10,2,0,False
2017-05-09,4,36,11,2,0,False
2017-05-10,5,34,10,2,0,False


# Obtenir des chiffres pertinents sur l'année
Par défaut, sur Google Trends, lorsque l'on fait une recherche sur un mot-clé sur une période par un an, la granularité est par semaine (et impossible d'en changer).

On écrit donc quelques fonctions pour "tricher" un peu : pour un mot-clé donné, on décide donc de prendre deux périodes plus petites (sept-huit mois), quitte à rescaler la deuxième période en se servant de la période de chevauchement intermédiaire.

In [81]:
def google_trends_tables(keyword, start_x = '2017-05-01', start_y = '2017-10-01', end_x = '2018-01-01', end_y = '2018-05,'):
    
    # Google Trends ne permet pas d'avoir une granularité par jour sur une période d'un an.
    #
    # Pour régler le problème, je bricole un peu :
    #  - Je fais une première période qui va du 1er mai 2017 au 1er janvier 2018 (bizarrement, ça marche),
    #  - Une deuxième période qui va du 1er octobre 2017 au 1er mai 2018
    #
    # J'utiliserai la période de chevauchement (trois mois) pour déterminer un scale moyen, que j'applique à la
    # colonne de nouvelles valeurs.
    #
    # Les valeurs sont pas vraiment les mêmes (Google s'amuse probablement avec du sampling), mais c'est pas
    # si mal.
    
    keyword = [keyword] # La méthode build_payload demande d'avoir un objet itérable (donc liste)
    
    pytrends_before, pytrends_after = TrendReq(hl = 'en-US', tz = 0), TrendReq(hl = 'en-US', tz = 0)
    
    # Déterminer les modèles pytrends
    pytrends_before.build_payload(keyword, cat = 0, timeframe = '2017-10-01 2018-05-01', geo = '', gprop = '')
    pytrends_after.build_payload(keyword, cat=0, timeframe='2018-02-01 2018-07-31', geo='', gprop='')
    
    # On obtient les tables correspondantes
    table_before = pytrends_before.interest_over_time()
    table_after = pytrends_after.interest_over_time()
    
    return (table_before, table_after)

def scale_generator(table_before, table_after):
    
    # Fonction qui permet de scale
    
    common_table = table_before.merge(table_after, "inner", right_index = True, left_index = True)
    common_table.columns = ['before_value', 'filler1', 'after_value', 'filler2']
    common_table['scale'] = common_table.before_value / common_table.after_value
        
    return common_table.scale.mean()
    
    
def google_trends_common(keyword, table_before = None, table_after = None):
    
    # Génère une grosse table qui fusionne un peu les deux tables before et after
    
    if table_before is None and table_after is None:
        tables = google_trends_tables(keyword)
        table_before, table_after = tables[0], tables[1]
    
    # On renomme les colonnes juste avec le mot clé
    table_before.columns = [keyword, "isPartial"]
    table_after.columns = [keyword, "isPartial"]
    
    # On applique le scale sur la deuxième table
    table_after[keyword] = table_after[keyword].apply(lambda x: int(scale_generator(table_before, table_after) * x), 1)
    
    return table_before.merge(table_after, "outer", right_index = True, left_index = True)


def trends_merge(keyword, min_length = 93, table_before = None, table_after = None):
    
    # Simplifie pour obtenir un truc un peu plus petit
    
    common_table = google_trends_common(keyword, table_before, table_after)
    
    x_values, y_values = common_table[keyword + '_x'], common_table[keyword + '_y']
    
    # Annuler les valeurs nulles
    x_values = x_values[~np.isnan(x_values)]
    y_values = y_values[~np.isnan(y_values)][min_length:]
    
    merged_values = list(x_values) + list(y_values)    
    merged_values = [value * 100 // max(merged_values) for value in merged_values]
    
    return merged_values
    
    
def google_trends_unique(keyword, min_length = 90, table_before = None, table_after = None):
    
    # Fonction finale à retenir pour l'utilisateur
    
    common_table = google_trends_common(keyword, table_before, table_after)

    common_table[keyword] = trends_merge(keyword, min_length, table_before, table_after)
    
    common_table.drop([keyword + '_x', keyword + '_y', "isPartial" + "_x", "isPartial" + "_y"], 1, inplace = True)
    
    return common_table
    

def google_trends(keywords, min_length = 90, table_before = None, table_after = None):
    
    trends = google_trends_unique(keywords[0], min_length, table_before, table_after)
    
    for keyword in keywords[1:]:
        
        trends = trends.merge(google_trends_unique(keyword, min_length, table_before, table_after),
                              'left', left_index = True, right_index = True)
        
    return trends

## Ça donne quoi en pratique ?

In [30]:
#trends = google_trends(['bitcoin', 'btc', 'cryptocurrency', 'hodl', 'blockchain'])
trends

Unnamed: 0_level_0,bitcoin,btc,cryptocurrency,hodl,blockchain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-05-01,4.0,9.0,2.0,1.0,15.0
2017-05-02,4.0,10.0,1.0,1.0,19.0
2017-05-03,4.0,9.0,1.0,1.0,18.0
2017-05-04,5.0,12.0,2.0,1.0,20.0
2017-05-05,5.0,11.0,3.0,2.0,19.0
2017-05-06,4.0,10.0,2.0,0.0,12.0
2017-05-07,4.0,9.0,2.0,1.0,14.0
2017-05-08,5.0,11.0,3.0,2.0,17.0
2017-05-09,6.0,13.0,3.0,1.0,20.0
2017-05-10,6.0,12.0,3.0,2.0,20.0


In [63]:
trends.loc['2017-10-10']

bitcoin            9.0
btc               17.0
cryptocurrency     9.0
hodl               3.0
blockchain        36.0
Name: 2017-10-10 00:00:00, dtype: float64

On extrait ensuite les données dans un fichier .csv, qui sera exploité sur le notebook R (plus facile à utiliser pour les modèles de prédiction).

In [None]:
trends.to_csv('./data/trends.csv')

Pour des données plus complètes, on modifie un peu la fonction.

In [82]:
trends_2 = google_trends(['bitcoin', 'btc', 'cryptocurrency', 'hodl', 'blockchain'])
trends_2

Unnamed: 0_level_0,bitcoin,btc,cryptocurrency,hodl,blockchain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-10-01,7.0,12.0,9.0,2.0,20.0
2017-10-02,8.0,14.0,10.0,2.0,32.0
2017-10-03,8.0,14.0,10.0,3.0,33.0
2017-10-04,8.0,16.0,9.0,2.0,32.0
2017-10-05,8.0,17.0,9.0,1.0,32.0
2017-10-06,7.0,14.0,8.0,2.0,31.0
2017-10-07,7.0,12.0,8.0,2.0,22.0
2017-10-08,7.0,13.0,8.0,4.0,21.0
2017-10-09,9.0,15.0,9.0,3.0,33.0
2017-10-10,9.0,16.0,9.0,4.0,37.0


In [83]:
merged_trends = trends.append(trends_2[213:])
merged_trends

Unnamed: 0_level_0,bitcoin,btc,cryptocurrency,hodl,blockchain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-05-01,4.0,9.0,2.0,1.0,15.0
2017-05-02,4.0,10.0,1.0,1.0,19.0
2017-05-03,4.0,9.0,1.0,1.0,18.0
2017-05-04,5.0,12.0,2.0,1.0,20.0
2017-05-05,5.0,11.0,3.0,2.0,19.0
2017-05-06,4.0,10.0,2.0,0.0,12.0
2017-05-07,4.0,9.0,2.0,1.0,14.0
2017-05-08,5.0,11.0,3.0,2.0,17.0
2017-05-09,6.0,13.0,3.0,1.0,20.0
2017-05-10,6.0,12.0,3.0,2.0,20.0


In [84]:
merged_trends.to_csv('./data/merged_trends.csv')

# Tentative de scraping à partir de résultats Google

In [None]:
import feedparser

d = feedparser.parse("http://feeds.reuters.com/reuters/technologyNews")

In [20]:
import requests
r = requests.get("https://www.google.com/search?q=bitcoin&tbas=0&tbs=cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&tbm=nws&ei=GuygW9O9pC7LsAHi0Y3gBQ&sa=N&biw=709&bih=821&dpr=1&start=0",
                 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'})

soup = BeautifulSoup(r.text, "html.parser")

soup

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">

<html>
<head><meta content="text/html; charset=utf-8" http-equiv="content-type"><meta content="initial-scale=1" name="viewport"><title>https://www.google.com/search?q=bitcoin&amp;tbas=0&amp;tbs=cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&amp;tbm=nws&amp;ei=GuygW9O9pC7LsAHi0Y3gBQ&amp;sa=N&amp;biw=709&amp;bih=821&amp;dpr=1&amp;start=0</title></meta></meta></head>
<body onload="e=document.getElementById('captcha');if(e){e.focus();}" style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;">
<div style="max-width:400px;">
<hr noshade="" size="1" style="color:#ccc; background-color:#ccc;"><br>
<form action="index" id="captcha-form" method="post">
<script async="" defer="" src="https://www.google.com/recaptcha/api.js"></script>
<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
<div class="g-recaptcha" data-callback="submitCall

In [7]:
def google_scraper(document_url, min_ind = 0, max_ind = np.Inf, step = 1, sleep = 1, header = True, **kwargs):
        
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    # Header to show that we are not a bot
    # Note that for some reason, the function works much better if we pretend we are using Chrome:
    #  - The time filter (from May 1st 2017 to May 1st 2018) works as intended, which is not the case with Firefox,
    #  - The date in the snippets can be recognised normally, thanks to its HTML tag
    
    results = pd.DataFrame(columns = kwargs)
    
    ind = min_ind - step # The number of the pages that we iterate through
    attempt = 0
    
    if document_url[-1] == '1':
        document_url = document_url[:-1] # In order to have the proper number of pages!
    
    print('Document: ', document_url)

    while True and ind < max_ind:
        ind += step
        print('Page', ind, " –  URL: ", document_url + str(ind)) # Proves the algorithm is still working
        result = pd.DataFrame(columns = kwargs)
        time.sleep(sleep) # May be needed against anti-bot measures?
        
        try:
            if header == True:
                req = requests.get(document_url + str(ind), headers = headers)
            else: # Sometimes (like with BestBuy), it does not even work if there is a header ...
                req = requests.get(document_url + str(ind))
                
            soup = BeautifulSoup(req.text, "html.parser")
    
            for key, value in kwargs.items():
                result[str(key)] = soup.findAll(value[0], value[1])
            
            result['page'] = (ind // step) + (1 - min_ind)
            results = results.append(result, ignore_index = True)
            
            if len(result) == 0 and attempt >= 3: # If the result is empty, there are no more comments to get -> we break the loop
                return(results)
            
            elif len(result) == 0:
                attempt += 1
                ind -= 10
                print ('Attempt no.' + str(attempt))
                
            else:
                attempt = 0
        
        except KeyboardInterrupt: # If we want to stop the code, well, we still retain our data
            print('Keyboard Interrupt')
            return(results)
        
        #except: # Prevents the code from stopping abruptly (sometimes the website will return an error for no reason)
        #    print('Some error:', ind)
        #    continue
            
    else:
        return(results)    

In [9]:
def data_scraper(html_doc, html_type, html_class):
    
    try:
        result = html_doc.findAll(html_type, html_class)[0].text
        
    except IndexError:
        result = np.NaN
        
    return result

def data_interpretator(result, col_name, **kwargs):
    
    for key, value in kwargs.items():
        
        result[str(key)] = result[col_name].apply(lambda x: data_scraper(x, value[0], value[1]))
        
    return result

# Etablissement d'un corpus de sites

Ce corpus est composé de trois types de sites Internet, qui offrent chacun une perspective legèrement différente :

- Les sites de journaux orientés économique (type Financial Times, Wall Street Journal), qui s'intéressent au sujet, mais peut-être de manière un peu conservatrice,

- Les sites de journaux généralistes (The Guardian), qui suivent l'affaire d'un peu plus loin, et qui ont de bonnes chances *a priori* de suivre la hype lorsque le marché monte, et d'en parler beaucoup moins après une chute,

- Les sites spécialisés (Coindesk).

In [10]:
financial_newspapers = ['ft.com', 'bbc.com', 'cnn.com', 'cnbc.com', 'economist.com', 'forbes.com', 'wsj.com',
                        'bloomberg.com', 'investopedia.com', 'fortune.com', 'foxbusiness.com', 'born2invest.com',
                        'economictimes.indiatimes.com', 'business-standard.com', 'reuters.com']

general_newspapers = ['theguardian.com', 'nytimes.com', 'washingtonpost.com', 'chicagotribune.com', 'abcnews.go.com',
                      'cbsnews.com', 'nbcnews.com', 'thetimes.co.uk', 'independent.co.uk', 'time.com']

crypto_websites = ['coindesk.com', 'coinjournal.net', 'coininsider.com', 'cointelegraph.com', 'bitcoinmagazine.com',
                   'cryptonews.com']

In [14]:
def table_generator(website_list, max_ind = 400, sleep = 1):
    
    final_output = pd.DataFrame(columns = ['snippet', 'page', 'date', 'title', 'abstract'])
    
    for newspaper in website_list:
        
        google_url = "https://www.google.com/search?q=bitcoin+site:" + newspaper + "&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start="
        
        newspaper_results = google_scraper(google_url, step = 10, max_ind = max_ind, sleep = sleep, snippet = ('div', 'g'))
        
        data_interpretator(newspaper_results, "snippet",
                   title = ('h3', 'r'),
                   abstract = ('span', 'st'),
                   date = ('span', 'f'))
        
        newspaper_results['source'] = newspaper
        
        final_output = final_output.append(newspaper_results, ignore_index = True)
        
    return final_output


def individual_table(website, max_ind = 400, sleep = 120):

        
    google_url = "https://www.google.com/search?q=bitcoin+site:" + website + "&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start="
        
    newspaper_results = google_scraper(google_url, step = 10, max_ind = max_ind, sleep = sleep, snippet = ('div', 'g'))
        
    data_interpretator(newspaper_results, "snippet",
                   title = ('h3', 'r'),
                   abstract = ('span', 'st'),
                   date = ('span', 'f'))
        
    newspaper_results['source'] = website
        
        #final_output = final_output.append(newspaper_results, ignore_index = True)
        
    #return final_output
    return newspaper_results

In [None]:
#financial_table = table_generator(financial_newspapers, max_ind = 400, sleep = 5)

In [None]:
#financial_table

In [None]:
#financial_table = table_generator(financial_newspapers, max_ind = 400, sleep = 16)

In [12]:
def multiprocessor(applied_function, website_list):

    if __name__ == '__main__':
        start_time = time.time()
        pool = Pool(len(website_list) + 1)
        final_output = pool.map_async(applied_function, website_list)
        pool.close()
        pool.join()
        
    return final_output

In [28]:
a = multiprocessor(individual_table, financial_websites)
stock_finance = a.get()
stock_finance

[     page                                            snippet  \
 0     1.0  <div class="g"><!--m--><div data-hveid="CAcQAA...   
 1     1.0  <div class="g"><!--m--><div data-hveid="CAYQAA...   
 2     1.0  <div class="g"><!--m--><div data-hveid="CAgQAA...   
 3     1.0  <div class="g"><!--m--><div data-hveid="CAkQAA...   
 4     1.0  <div class="g"><!--m--><div data-hveid="CAIQAA...   
 5     1.0  <div class="g"><!--m--><div data-hveid="CAQQAA...   
 6     1.0  <div class="g"><!--m--><div data-hveid="CAEQAA...   
 7     1.0  <div class="g"><!--m--><div data-hveid="CAAQAA...   
 8     1.0  <div class="g"><!--m--><div data-hveid="CAUQAA...   
 9     1.0  <div class="g"><!--m--><div data-hveid="CAMQAA...   
 10    1.0  <div class="g kno-kp mnr-c g-blk" data-hveid="...   
 11    2.0  <div class="g"><!--m--><div data-hveid="CAkQAA...   
 12    2.0  <div class="g"><!--m--><div data-hveid="CAgQAA...   
 13    2.0  <div class="g"><!--m--><div data-hveid="CAcQAA...   
 14    2.0  <div class="g

In [30]:
finance_data = pd.DataFrame(columns = ['snippet', 'date', 'title', 'abstract', 'source'])

for dataset in stock_finance:
    
    finance_data = finance_data.append(stock_finance, ignore_index = True)
    
finance_data

Unnamed: 0,abstract,date,page,snippet,source,title
0,18 Jan 2018 - There goes bitcoin. The world's ...,18 Jan 2018 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAcQAA...",ft.com,I told you investing in bitcoin was a bad idea...
1,10 Dec 2017 - The Japanese exchange at the hea...,10 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAYQAA...",ft.com,Bitcoin feeding frenzy fuelled by 15 times lev...
2,29 Nov 2017 - The current craze for cryptocurr...,29 Nov 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAgQAA...",ft.com,There are many reasons to be cautious about bi...
3,24 Dec 2017 - Bitcoin has struggled to recover...,24 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAkQAA...",ft.com,Bitcoin drop continues during holiday trading ...
4,24 Dec 2017 - Bitcoin suffered a day of wild s...,24 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAIQAA...",ft.com,Bitcoin swings wildly after its biggest revers...
5,19 Mar 2018 - All the past bitcoin bear market...,19 Mar 2018 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAQQAA...",ft.com,"The Bitcoin collapses, charted | FT Alphaville"
6,14 Dec 2017 - Here's a thought experiment. If ...,14 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAEQAA...",ft.com,Bitcoin's fractioning problem | FT Alphaville
7,7 Dec 2017 - Bitcoin prices on its biggest exc...,7 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAAQAA...",ft.com,Chaotic trading marks new surge in bitcoin pri...
8,12 Jan 2018 - Bitcoin investors trying to chan...,12 Jan 2018 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAUQAA...",ft.com,Bitcoin investors struggle to cash out new for...
9,11 Dec 2017 - Goldman Sachs has answered the q...,11 Dec 2017 -,1.0,"<div class=""g""><!--m--><div data-hveid=""CAMQAA...",ft.com,Bitcoin is not taking demand from gold – Goldm...


In [38]:
finance_data.to_csv('./data/finance_data.csv')

In [13]:
general_data = multiprocessor(individual_table, general_newspapers)

Document:  https://www.google.com/search?q=bitcoin+site:theguardian.com&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start=
Page 0  –  URL:  https://www.google.com/search?q=bitcoin+site:theguardian.com&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start=0
Document:  https://www.google.com/search?q=bitcoin+site:nytimes.com&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start=
Page 0  –  URL:  https://www.google.com/search?q=bitcoin+site:nytimes.com&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_min:5/1/2017,cd_max:5/1/2018&ei=3lGhW73pC8r3qwGV6Kn4BQ&sa=N&biw=709&bih=821&start=0
Document:  https://www.google.com/search?q=bitcoin+site:nbcnews.com&lr=lang_en&rlz=1C5CHFA_enFR566FR566&tbs=lr:lang_1en,cdr:1,cd_m

Process ForkPoolWorker-11:
Traceback (most recent call last):
Process ForkPoolWorker-1:
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()


Keyboard Interrupt


  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "<ipython-input-11-77db6ddf4f07>", line 28, in individual_table
    newspaper_results = google_scraper(google_url, step = 10, max_ind = max_ind, sleep = sleep, snippet = ('div', 'g'))
  File "/Users/gregoirevirepinte/anaconda/lib/python3.5/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
  File "<ipython-input-7-bce96921fecd>", line 24, in google_scraper
    time.sleep(sleep

KeyboardInterrupt: 