In [1]:

%load_ext autoreload
%autoreload 2

# For debugging turn on logging to console
import logging
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)

# mongodb
import pymongo as pm

# fine-tuned newspaper lib
from resources.newspaper import newspaper
from resources.newspaper.newspaper.source import Source
from resources.newspaper.newspaper.article import Article

import bs4 as bs
from urllib.parse import urljoin
from dateutil.parser import parse as date_parser
from time import sleep
import random
import pytz
import datetime

import requests

In [23]:
# for infinite scroll page
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import selenium
import sys

import unittest, time, re

# to divert selenium log stream away
logging.getLogger('selenium').setLevel(logging.WARNING)

In [3]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pm.MongoClient(conn, maxPoolSize=200)

# define db 
# DB_NAME = 'scrape'
DB_NAME = 'FINALP'
db = client[DB_NAME]

def saveToDB(db, collection, url, html, meta={}):
    """
    Saves a document to mongoDB, making sure there are no duplicates by 
    'url' value
    
    Parameters:
    --------
    db, collection  : mongo db connection
    url, html, meta : values to store
    
    Returns:
    --------
    Saved document
    """
    collection = db[collection]
    collection.update_one(
        {'url' : url},
        {
            '$set':
                {'url' : url,
                 'html' : html,
                 'meta' : meta
                }
        }
        ,
        upsert=True
    )
    log.debug(f'Saved to DB')

def scrape(url, db, collection):
    '''
    Scrapes an article from the 'url' up to the 'latest_date'
    
    Parameters:
    --------
    url         : main news website url
    date        : YYYY-MM-DD
    db          : database name
    collection  : mongodb collection
    
    Returns:
    --------
    Article's html and features stored to db, 
    Article's publish date
    
    '''
    log.debug(f"Exctracting features from {url}")
    try:
        article = Article(url)
        article.download()
        # the below method may only extract a snippet... 
        # check the database for results of text extraction
        # and apply additional processing if needed after 
        # article has been stored in the DB
        # see code below Newrepublic for example
        article.parse()
    except Exception as e:
        log.critical(f'Data not saved: {e}')
        return datetime.datetime.now()
    
    saveToDB(db, collection, article.url, article.html, meta={
        'date'    :article.publish_date,
        'title'   :article.title,
        'text'    :article.text,
        'authors' :article.authors
    })
    
    return article.publish_date

def addToDB(DB_NAME,COL_NAME,PATH,FILE):
    '''
    Imports a file into mongoDB
    
    Parameters:
    --------
    DB_NAME : Name of the database to connect to
    COL_NAME: Name of the collection to create
    PATH    : Path to folder with the file
    FILE  : Filename
    
    Returns:
    --------
    Collection COL_NAME in DB_NAME database
    '''
    !mongoimport --db {DB_NAME} --collection {COL_NAME} --file {PATH+FILE} --batchSize 1
    print(f'Collection {COL_NAME} in {DB_NAME} database created')

In [None]:
collection = 'dailywire'
source = 'https://www.dailywire.com'

logging.getLogger('selenium').setLevel(logging.WARNING)

class Sel(unittest.TestCase):
    def setUp(self):
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(30)
        self.base_url = source
        self.verificationErrors = []
        self.accept_next_alert = True
    def getPage(self):
        driver = self.driver
        delay = 2
        driver.get(self.base_url)
        html_source = driver.page_source
        self.html = html_source.encode('utf-8')
        return self.html
    def scrollDown(self):
        driver = self.driver
#         driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        data.driver.find_element_by_css_selector('button.load-more').click()
        self.html = driver.page_source.encode('utf-8')
        return driver.page_source 
    def shutdown(self):
        driver = self.driver
        driver.quit()

data = Sel()

data.setUp()
data.getPage()
page = 1

utc=pytz.UTC
earliest_date = utc.localize(date_parser('2017-01-01'))

scraped_urls = []

while True:
    log.debug(f'NEXT SCROLL (#{page})')
    page += 1
    
    html = data.scrollDown()
    soup = bs.BeautifulSoup(html,'lxml')

    for link in soup.find('section', {"class":"article-teasers article-teaser-template template-content"}).findAll('a'):
        url = urljoin(source, link['href'])
        if url and url not in scraped_urls:
            scraped_urls.append(url)
            log.debug(f'Processing url: {url}')
            article_date = scrape(url, db, collection)
            log.debug(f"Date scraped: {article_date}")

    try:
        if article_date < earliest_date:
            log.debug(f'Reached earliest date requested: {article_date}')
            break
    except Exception as e:
        log.debug(f"Something is wrong: {e}")

data.shutdown()

DEBUG:__main__:NEXT SCROLL (#1)
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33850/watch-ice-official-has-explain-democrat-senator-ryan-saavedra
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33850/watch-ice-official-has-explain-democrat-senator-ryan-saavedra
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33850/watch-ice-official-has-explain-democrat-senator-ryan-saavedra HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-31 12:40:22-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33853/facebook-announces-it-has-found-new-attempts-hank-berrien
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33853/facebook-announces-it-has-found-new-attempts-hank-berrien
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:http

DEBUG:__main__:Processing url: https://www.dailywire.com/news/33837/manafort-prosecutors-order-witnesses-not-mention-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33837/manafort-prosecutors-order-witnesses-not-mention-emily-zanotti
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33837/manafort-prosecutors-order-witnesses-not-mention-emily-zanotti HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-31 10:05:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33833/kansas-sheriff-hopes-arm-school-staff-prevent-kassy-dillon
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33833/kansas-sheriff-hopes-arm-school-staff-prevent-kassy-dillon
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "

DEBUG:__main__:Processing url: https://www.dailywire.com/news/33812/bob-woodwards-new-book-show-harrowing-life-inside-joseph-curl
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33812/bob-woodwards-new-book-show-harrowing-life-inside-joseph-curl
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33812/bob-woodwards-new-book-show-harrowing-life-inside-joseph-curl HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-31 07:44:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33814/ocasio-cortez-fundraise-los-angeles-pursuit-better-jeffrey-cawood
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33814/ocasio-cortez-fundraise-los-angeles-pursuit-better-jeffrey-cawood
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywir

DEBUG:__main__:Processing url: https://www.dailywire.com/news/33791/congressional-candidate-austin-petersen-suspended-kassy-dillon
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33791/congressional-candidate-austin-petersen-suspended-kassy-dillon
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33791/congressional-candidate-austin-petersen-suspended-kassy-dillon HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 15:40:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33788/watch-cnns-jim-acosta-shut-down-white-house-aide-amanda-prestigiacomo
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33788/watch-cnns-jim-acosta-shut-down-white-house-aide-amanda-prestigiacomo
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://w

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 12:35:09-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33777/dave-rubin-will-smith-welcome-intellectual-dark-alexander-ruiz
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33777/dave-rubin-will-smith-welcome-intellectual-dark-alexander-ruiz
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33777/dave-rubin-will-smith-welcome-intellectual-dark-alexander-ruiz HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 12:20:48-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33776/trump-doubles-down-govt-shutdown-threat-says-hed-joseph-curl
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33776/trump-doubles-down-govt-shutdown-threat-says-hed-joseph-curl
DEBUG:urllib3.connectionpool:Starting new HTTPS connecti

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 10:41:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33750/walsh-new-york-times-just-published-most-racist-matt-walsh
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33750/walsh-new-york-times-just-published-most-racist-matt-walsh
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33750/walsh-new-york-times-just-published-most-racist-matt-walsh HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 10:39:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33765/final-report-missing-malaysia-airlines-plane-calls-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33765/final-report-missing-malaysia-airlines-plane-calls-emily-zanotti
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 08:22:40-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33745/netflix-focused-building-faith-and-family-based-joseph-curl
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33745/netflix-focused-building-faith-and-family-based-joseph-curl
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33745/netflix-focused-building-faith-and-family-based-joseph-curl HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-30 08:17:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33741/giulianis-comment-about-russian-collusion-causes-james-barrett
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33741/giulianis-comment-about-russian-collusion-causes-james-barrett
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1

DEBUG:__main__:Processing url: https://www.dailywire.com/news/33731/rudy-giuliani-claims-someone-may-have-messed-cohen-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33731/rudy-giuliani-claims-someone-may-have-messed-cohen-emily-zanotti
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33731/rudy-giuliani-claims-someone-may-have-messed-cohen-emily-zanotti HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-29 10:52:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33730/federal-judge-border-reunification-process-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33730/federal-judge-border-reunification-process-emily-zanotti
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443

DEBUG:__main__:Processing url: https://www.dailywire.com/news/33714/prager-greatest-hysteria-american-history-dennis-prager
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33714/prager-greatest-hysteria-american-history-dennis-prager
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33714/prager-greatest-hysteria-american-history-dennis-prager HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-28 10:00:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33717/williams-can-we-trust-experts-walter-e-williams
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33717/williams-can-we-trust-experts-walter-e-williams
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33717/williams-can-we-trust-exper

DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33704/doctors-debunk-assisted-suicide-myths-not-always-paul-bois
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33704/doctors-debunk-assisted-suicide-myths-not-always-paul-bois HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-27 15:40:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33703/insane-brawl-breaks-out-trumps-hollywood-walk-fame-james-barrett
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33703/insane-brawl-breaks-out-trumps-hollywood-walk-fame-james-barrett
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33703/insane-brawl-breaks-out-trumps-hollywood-walk-fame-james-barrett HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33688/walsh-young-people-flock-socialism-because-they-matt-walsh HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-27 10:30:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33689/report-cbs-chairman-be-accused-sexual-misconduct-hank-berrien
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33689/report-cbs-chairman-be-accused-sexual-misconduct-hank-berrien
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33689/report-cbs-chairman-be-accused-sexual-misconduct-hank-berrien HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-27 10:14:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33687/sad-reality-show-stardo

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33670/kaboom-us-economy-explodes-51-growth-rate-joseph-curl HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-27 05:33:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33665/jane-fondas-tepid-apology-1972-photo-earns-ire-jacob-airey
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33665/jane-fondas-tepid-apology-1972-photo-earns-ire-jacob-airey
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33665/jane-fondas-tepid-apology-1972-photo-earns-ire-jacob-airey HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-27 04:48:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33662/ocasio-cortez-makes-her-most-idiotic-

DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33652/bidens-niece-avoids-jail-after-felony-conviction-ryan-saavedra HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 15:28:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33654/facebook-just-made-stock-market-history-and-not-james-barrett
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33654/facebook-just-made-stock-market-history-and-not-james-barrett
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33654/facebook-just-made-stock-market-history-and-not-james-barrett HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 15:19:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33651/yale-administrator-encouraged-accuser-come-forward-ashe-schow
DEBUG:__main__:Exctracting features fro

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33639/white-house-attacked-barring-cnn-reporter-take-hank-berrien HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 10:46:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33637/vomit-fraud-florida-uber-riders-claim-drivers-ashe-schow
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33637/vomit-fraud-florida-uber-riders-claim-drivers-ashe-schow
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33637/vomit-fraud-florida-uber-riders-claim-drivers-ashe-schow HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 10:44:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33635/obama-sought-wipe-out-security-cleara

DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33623/weird-dem-congressman-has-tampon-obsession-hank-berrien HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 08:22:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33615/kimmel-any-other-president-wouldnt-survive-cohen-paul-bois
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33615/kimmel-any-other-president-wouldnt-survive-cohen-paul-bois
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33615/kimmel-any-other-president-wouldnt-survive-cohen-paul-bois HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-26 08:20:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33618/irony-portlands-occupy-ice-protest-builds-wall-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dai

DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33604/watch-democrat-rep-makes-insane-suggestion-about-ryan-saavedra HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 18:04:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33597/watch-police-office-shaves-homeless-mans-beard-kassy-dillon
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33597/watch-police-office-shaves-homeless-mans-beard-kassy-dillon
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33597/watch-police-office-shaves-homeless-mans-beard-kassy-dillon HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 17:26:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33602/art-deal-real-michael-j-knowles
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/336

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 14:20:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33586/co-founder-radical-feminist-group-femen-found-dead-paul-bois
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33586/co-founder-radical-feminist-group-femen-found-dead-paul-bois
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33586/co-founder-radical-feminist-group-femen-found-dead-paul-bois HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 13:39:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33584/another-major-win-trump-gets-concessions-eu-stock-hank-berrien
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33584/another-major-win-trump-gets-concessions-eu-stock-hank-berrien
DEBUG:urllib3.connectionpool:Starting new HTTPS connection

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 10:55:00-07:00
DEBUG:__main__:NEXT SCROLL (#18)
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33564/watch-leftists-attack-lauren-southern-her-response-ryan-saavedra
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33564/watch-leftists-attack-lauren-southern-her-response-ryan-saavedra
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33564/watch-leftists-attack-lauren-southern-her-response-ryan-saavedra HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 10:49:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33560/santa-barbara-authorizes-potential-jail-time-kassy-dillon
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33560/santa-barbara-authorizes-potential-jail-time-kassy-dillon
DEBUG:urllib3.connectio

DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 09:01:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33552/shock-report-feds-say-anti-ice-protesters-hurled-emily-zanotti
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33552/shock-report-feds-say-anti-ice-protesters-hurled-emily-zanotti
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33552/shock-report-feds-say-anti-ice-protesters-hurled-emily-zanotti HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 08:34:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33548/last-jedi-director-deletes-20000-tweets-following-paul-bois
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33548/last-jedi-director-deletes-20000-tweets-following-paul-bois
DEBUG:urllib3.connectionpool:Starting new HTTPS connection

DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33530/dillon-nikki-haley-right-dont-own-libs-kassy-dillon
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33530/dillon-nikki-haley-right-dont-own-libs-kassy-dillon HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-25 05:25:00-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33537/miller-supporting-israel-womens-work-%E2%80%93-naturally-paul-miller
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33537/miller-supporting-israel-womens-work-%E2%80%93-naturally-paul-miller
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33537/miller-supporting-israel-womens-work-%E2%80%93-naturally-paul-miller HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DE

DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33522/nbcs-chief-global-correspondent-wants-trump-hank-berrien
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33522/nbcs-chief-global-correspondent-wants-trump-hank-berrien HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-24 14:10:01-07:00
DEBUG:__main__:Processing url: https://www.dailywire.com/news/33519/theres-new-privilege-town-thin-privilege-paul-bois
DEBUG:__main__:Exctracting features from https://www.dailywire.com/news/33519/theres-new-privilege-town-thin-privilege-paul-bois
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.dailywire.com
DEBUG:urllib3.connectionpool:https://www.dailywire.com:443 "GET /news/33519/theres-new-privilege-town-thin-privilege-paul-bois HTTP/1.1" 200 None
DEBUG:__main__:Saved to DB
DEBUG:__main__:Date scraped: 2018-07-24 14:00:

In [47]:
data.shutdown()