# Dependencies

In [1]:
# %load_ext autoreload
# %autoreload 2

# For debugging turn on logging to console
import logging
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)

# mongodb
import pymongo as pm

# fine-tuned newspaper lib
from resources.newspaper import newspaper
from resources.newspaper.newspaper.source import Source
from resources.newspaper.newspaper.article import Article

import bs4 as bs
from urllib.parse import urljoin
from dateutil.parser import parse as date_parser
from time import sleep
import random
import pytz
import datetime

import requests

logging.getLogger('urllib3').setLevel(logging.WARNING)

In [None]:
# for infinite scroll page
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import sys

import unittest, time, re

# to divert selenium log stream away
logging.getLogger('selenium').setLevel(logging.WARNING)


# Helpers

In [20]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pm.MongoClient(conn, maxPoolSize=200)

# define db 
# DB_NAME = 'scrape'
DB_NAME = 'FINALP'
db = client[DB_NAME]

def saveToDB(db, collection, url, html, meta={}):
    """
    Saves a document to mongoDB, making sure there are no duplicates by 
    'url' value
    
    Parameters:
    --------
    db, collection  : mongo db connection
    url, html, meta : values to store
    
    Returns:
    --------
    Saved document
    """
    collection = db[collection]
    collection.update_one(
        {'url' : url},
        {
            '$set':
                {'url' : url,
                 'html' : html,
                 'meta' : meta
                }
        }
        ,
        upsert=True
    )
    log.debug(f'Saved to DB')

def scrape(url, db, collection):
    '''
    Scrapes an article from the 'url' up to the 'latest_date'
    
    Parameters:
    --------
    url         : main news website url
    date        : YYYY-MM-DD
    db          : database name
    collection  : mongodb collection
    
    Returns:
    --------
    Article's html and features stored to db, 
    Article's publish date
    
    '''
    log.debug(f"Exctracting features from {url}")
    try:
        article = Article(url)
        article.download()
        # the below method may only extract a snippet... 
        # check the database for results of text extraction
        # and apply additional processing if needed after 
        # article has been stored in the DB
        # see code below Newrepublic for example
        article.parse()
    except Exception as e:
        log.critical(f'Data not saved: {e}')
        return datetime.datetime.now()
    
    saveToDB(db, collection, article.url, article.html, meta={
        'date'    :article.publish_date,
        'title'   :article.title,
        'text'    :article.text,
        'authors' :article.authors
    })
    
    return article.publish_date

def addToDB(DB_NAME,COL_NAME,PATH,FILE):
    '''
    Imports a file into mongoDB
    
    Parameters:
    --------
    DB_NAME : Name of the database to connect to
    COL_NAME: Name of the collection to create
    PATH    : Path to folder with the file
    FILE  : Filename
    
    Returns:
    --------
    Collection COL_NAME in DB_NAME database
    '''
    !mongoimport --db {DB_NAME} --collection {COL_NAME} --file {PATH+FILE} --batchSize 1
    print(f'Collection {COL_NAME} in {DB_NAME} database created')

# NY Post

In [None]:
collection = 'nypost'
source = 'https://nypost.com/news/'

class Sel(unittest.TestCase):
    def setUp(self):
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(30)
        self.base_url = source
        self.verificationErrors = []
        self.accept_next_alert = True
    def getPage(self):
        driver = self.driver
        delay = 2
        driver.get(self.base_url)
        html_source = driver.page_source
        self.html = html_source.encode('utf-8')
        return self.html
    def scrollDown(self):
        driver = self.driver
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        self.html = driver.page_source.encode('utf-8')
        return driver.page_source 
    def shutdown(self):
        driver = self.driver
        driver.quit()

data = Sel()

data.setUp()
data.getPage()
page = 1

utc=pytz.UTC
earliest_date = utc.localize(date_parser('2017-01-01'))

scraped_urls = []

while True:
    log.debug(f'NEXT SCROLL (#{page})')
    page += 1
    
    html = data.scrollDown()
    soup = bs.BeautifulSoup(html,'lxml')

    for link in soup.find('div', {"id":"primary"}).findAll('a'):
        url = urljoin(source, link['href'])
        if url and url not in scraped_urls:
            scraped_urls.append(url)
            log.debug(f'Processing url: {url}')
            article_date = scrape(url, db, collection)
    try:
        if article_date < earliest_date:
            log.debug(f'Reached earliest date requested: {article_date}')
            break
    except Exception as e:
        log.debug(f"Something is wrong: {e}")
data.shutdown()

In [None]:
collection = 'nypost'
source = 'https://nypost.com/news/page/'
page   = 182

utc=pytz.UTC
earliest_date = date_parser('2017-01-01')

while True:
    log.debug(f'\n\n PROCESSING PAGE: {source+str(page)}\n\n\
              ====================================\n\n')
    s = Source(source+str(page))
    page += 1
    s.download()
    soup = bs.BeautifulSoup(s.html,'lxml')

    for h3 in soup.find('div', {"id":"primary"}).findAll('h3'):
        for link in h3.findAll('a'):
            url = link['href']
            log.debug(f'Processing url: {url}')

            try:
                article_date = scrape(url, db, collection)
            except Exception as e:
                log.debug(e)
                article_date = earliest_date + 1 #to make sure scraping continues

        try:
            if article_date < earliest_date:
                log.debug(f'Reached earliest date requested: {article_date}')
                break
        except Exception as e:
            log.debug(f'Exception: {e}')
            continue

DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/182



DEBUG:__main__:Processing url: https://nypost.com/2018/06/20/giuliani-tells-trump-campaign-manager-to-keep-quiet-after-sessions-rant/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/20/giuliani-tells-trump-campaign-manager-to-keep-quiet-after-sessions-rant/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/20/man-arrested-in-london-subway-explosion/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/20/man-arrested-in-london-subway-explosion/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/20/indonesia-says-180-could-be-dead-after-tourist-boat-sinks/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/20/indonesia-says-180-could-be-dead-after-tourist-boat-sinks/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/20/mom-says-principal-humiliated-12-year-old-dau

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/teen-plotted-isis-style-school-bombing-to-cause-fear-cops-say/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/teen-plotted-isis-style-school-bombing-to-cause-fear-cops-say/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/white-house-deputy-chief-of-staff-to-resign/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/white-house-deputy-chief-of-staff-to-resign/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/185



DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/trump-administration-giving-nonprofit-nearly-460m-to-house-migrant-kids/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/trump-administration-giving-nonprofit-nearly-460m-to-house-migrant-kids/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/un

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/dennis-rodman-heaps-praise-upon-friend-for-life-kim-jong-un/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/horny-couple-joins-mile-high-club-in-full-view-of-passengers/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/horny-couple-joins-mile-high-club-in-full-view-of-passengers/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/white-house-downplays-china-trade-tensions-as-dispute/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/white-house-downplays-china-trade-tensions-as-dispute/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/inspector-general-fbi-didnt-let-anti-trump-views-affect-probe/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/inspector-general-fbi-didnt-let-anti-trump-views-affect-probe/
DEBUG:__main__:Saved to DB
DEBU

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/army-officially-splits-with-west-point-commie-cadet/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/army-officially-splits-with-west-point-commie-cadet/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/ncaa-basketball-referee-pleads-guilty-to-molesting-teen-boy/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/ncaa-basketball-referee-pleads-guilty-to-molesting-teen-boy/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/19/china-blasts-trumps-new-tariff-threat-as-blackmail/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/19/china-blasts-trumps-new-tariff-threat-as-blackmail/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/18/china-says-kim-jong-un-is-planning-a-visit-to-beijing/
DEBUG:__main__:Exctracting features from https:/

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/18/michelle-obama-joins-laura-bush-to-oppose-trumps-immigration-policy/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/18/sisters-accused-of-stabbing-each-other-in-mutual-combat/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/18/sisters-accused-of-stabbing-each-other-in-mutual-combat/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/18/no-a-werewolf-was-not-killed-in-montana/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/18/no-a-werewolf-was-not-killed-in-montana/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/18/teacher-nearly-struck-by-lightning-while-recording-incoming-storm/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/18/teacher-nearly-struck-by-lightning-while-recording-incoming-storm/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing 

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/17/man-stabs-himself-to-death-after-thinking-his-vest-was-stab-proof/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/17/man-stabs-himself-to-death-after-thinking-his-vest-was-stab-proof/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/17/at-least-5-dead-as-suv-chased-by-border-patrol-crashes/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/17/at-least-5-dead-as-suv-chased-by-border-patrol-crashes/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/17/kim-kardashian-wants-to-prove-death-row-inmates-innocence/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/17/kim-kardashian-wants-to-prove-death-row-inmates-innocence/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/17/nunes-vows-hell-to-pay-if-russia-probe-docs-not-turned-over/
DEBUG:__ma

DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/199



DEBUG:__main__:Processing url: https://nypost.com/2018/06/16/waitress-fired-booted-from-air-force-reserves-over-racist-video/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/16/waitress-fired-booted-from-air-force-reserves-over-racist-video/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/16/driver-detained-after-taxi-crashes-into-pedestrians-in-moscow/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/16/driver-detained-after-taxi-crashes-into-pedestrians-in-moscow/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/16/roller-coaster-was-taken-out-of-service-twice-before-derailment/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/16/roller-coaster-was-taken-out-of-service-twice-before-derailment/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://

DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/man-accused-of-chugging-beer-during-dui-stop/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/man-accused-of-chugging-beer-during-dui-stop/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/911-call-released-in-fatal-gator-attack-an-alligator-got-this-lady/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/911-call-released-in-fatal-gator-attack-an-alligator-got-this-lady/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/202



DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/officials-probing-death-with-links-to-2016-family-massacre/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/officials-probing-death-with-links-to-2016-family-massacre/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/this-is-why-you-shouldnt-park-in-front-of-f

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/priceless-revolutionary-war-medal-goes-on-display/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/priceless-revolutionary-war-medal-goes-on-display/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/5-injured-in-shooting-at-california-funeral-home-report/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/5-injured-in-shooting-at-california-funeral-home-report/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/15/fbi-employee-working-on-hillary-probe-blasted-trump-supporters-as-lazy-pos-after-election/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/15/fbi-employee-working-on-hillary-probe-blasted-trump-supporters-as-lazy-pos-after-election/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/charges-dropped-against-college-stude

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/french-police-stop-suspected-terror-attack-at-swingers-club/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/famed-stanford-prison-experiment-was-a-fraud-scientist-says/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/famed-stanford-prison-experiment-was-a-fraud-scientist-says/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/deer-runs-wild-through-washington-dc-subway-station/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/deer-runs-wild-through-washington-dc-subway-station/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/disgraced-ex-doctor-charged-in-1988-cold-case-murder/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/disgraced-ex-doctor-charged-in-1988-cold-case-murder/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing ur

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/officer-mocks-drug-suspect-after-violent-arrest/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/tunisian-man-accused-of-plotting-ricin-attack-in-germany/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/tunisian-man-accused-of-plotting-ricin-attack-in-germany/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/alice-johnson-never-heard-of-kim-kardashian-before-clemency-effort/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/alice-johnson-never-heard-of-kim-kardashian-before-clemency-effort/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/14/kim-jong-un-gets-the-movie-star-treatment-in-north-korea/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/14/kim-jong-un-gets-the-movie-star-treatment-in-north-korea/
DEBUG:__main__:Saved to DB
DEBUG:__

DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/ex-senate-staffer-charged-with-lying-to-feds-wants-to-bar-trump-from-talking-about-case/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/ex-senate-staffer-charged-with-lying-to-feds-wants-to-bar-trump-from-talking-about-case/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/teen-in-critical-condition-after-nearly-drowning-at-six-flags/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/teen-in-critical-condition-after-nearly-drowning-at-six-flags/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/teacher-fled-when-she-saw-student-sex-charges-on-social-media-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/teacher-fled-when-she-saw-student-sex-charges-on-social-media-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/man-accused-of-human-s

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/pompeo-north-korea-will-take-major-disarmament-steps-over-next-two-years/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/norwegian-lawmakers-nominate-trump-for-nobel-peace-prize/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/norwegian-lawmakers-nominate-trump-for-nobel-peace-prize/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/216



DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/no-one-wants-to-snitch-cops-puzzled-by-cockfighting-murder/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/no-one-wants-to-snitch-cops-puzzled-by-cockfighting-murder/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/13/student-crackpot-professor-pushed-holocaust-denial/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/13/student-crackpot-pr

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/woman-accused-of-fatally-beating-man-recovering-from-brain-surgery/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/patients-being-tested-for-hiv-after-nurse-reused-syringes/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/patients-being-tested-for-hiv-after-nurse-reused-syringes/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/woman-unleashes-racist-tirade-against-asian-nail-salon-owner/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/woman-unleashes-racist-tirade-against-asian-nail-salon-owner/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/219



DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/mueller-russia-is-still-meddling-in-our-elections/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/mueller-russia-is-st

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/mom-charged-in-death-of-disabled-13-year-old-daughter/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/worker-who-threatened-to-crash-into-school-bus-gets-job-back/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/worker-who-threatened-to-crash-into-school-bus-gets-job-back/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/mitch-mcconnell-becomes-longest-serving-gop-leader-in-senate/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/mitch-mcconnell-becomes-longest-serving-gop-leader-in-senate/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/boston-bros-leap-across-subway-track-doesnt-end-well/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/boston-bros-leap-across-subway-track-doesnt-end-well/
DEBUG:__main__:Saved to DB
DEBUG:__main__

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/trump-kim-jong-un-sign-very-important-unspecified-document/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/trump-kim-jong-un-sign-very-important-unspecified-document/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/george-h-w-bush-celebrates-94th-birthday/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/george-h-w-bush-celebrates-94th-birthday/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/gunman-kills-all-4-child-hostages-himself-after-hours-long-standoff-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/12/gunman-kills-all-4-child-hostages-himself-after-hours-long-standoff-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/12/jogger-caught-on-video-destroying-homeless-mans-encampment/
DEBUG:__main__:Exctract

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/national-monument-vandalized-with-teens-promposal-graffiti/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/united-flight-diverted-after-bomb-threat-discovered-on-board/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/united-flight-diverted-after-bomb-threat-discovered-on-board/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/laughter-is-kim-jong-uns-soft-spot-ex-teacher-says/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/laughter-is-kim-jong-uns-soft-spot-ex-teacher-says/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/3-men-charged-with-murder-in-missing-teens-case/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/3-men-charged-with-murder-in-missing-teens-case/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://

DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/trump-expects-kim-summit-to-work-out-very-nicely/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/trump-expects-kim-summit-to-work-out-very-nicely/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/gunman-barricades-himself-with-children-after-shooting-cop-authorities/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/gunman-barricades-himself-with-children-after-shooting-cop-authorities/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/pompeo-tries-to-reassure-north-korea-about-denuclearization/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/11/pompeo-tries-to-reassure-north-korea-about-denuclearization/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/11/cops-recover-officers-gun-after-being-stolen-from-burger-king-bathroom/
DEBUG:__main__:Exctracti

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/09/trump-ill-feel-if-kim-jong-un-is-serious-at-summit/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/09/trump-ill-feel-if-kim-jong-un-is-serious-at-summit/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/233



DEBUG:__main__:Processing url: https://nypost.com/2018/06/09/trump-french-president-get-into-another-handshake-duel/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/09/trump-french-president-get-into-another-handshake-duel/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/09/this-family-posed-as-jihadists-to-save-their-son-from-isis/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/09/this-family-posed-as-jihadists-to-save-their-son-from-isis/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/09/downed-power-lines-faulted-i

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/08/teen-murdered-in-mexico-weeks-after-he-was-deported-from-us/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/08/teen-murdered-in-mexico-weeks-after-he-was-deported-from-us/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/08/manafort-hit-with-new-charges-in-russia-probe/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/08/manafort-hit-with-new-charges-in-russia-probe/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/08/men-caught-with-enough-fentanyl-to-kill-26-million-people/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/08/men-caught-with-enough-fentanyl-to-kill-26-million-people/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/236



DEBUG:__main__:Processing url: https://nypost.com/2018/06/08/divers-search-pond-after-suspe

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/mitt-romney-predicts-a-solid-2020-win-for-trump/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/mitt-romney-predicts-a-solid-2020-win-for-trump/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/trump-to-leave-g7-summit-early-head-to-singapore/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/trump-to-leave-g7-summit-early-head-to-singapore/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/diners-accused-of-smoking-weed-walking-out-on-420-restaurant-bill/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/diners-accused-of-smoking-weed-walking-out-on-420-restaurant-bill/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/doj-seized-ny-times-reporters-phone-records-emails/
DEBUG:__main__:Exctracting features from https://ny

DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/mom-gets-life-boyfriend-gets-death-for-killing-boy-they-thought-was-gay/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/mom-gets-life-boyfriend-gets-death-for-killing-boy-they-thought-was-gay/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/suspect-wanted-for-fatally-stabbing-tourists-in-vegas-hotel/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/suspect-wanted-for-fatally-stabbing-tourists-in-vegas-hotel/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/stormy-daniels-unveils-truth-perfume/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/stormy-daniels-unveils-truth-perfume/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/trump-says-he-doesnt-need-to-prepare-very-much-for-kim-summit/
DEBUG:__main__:Exctracting features from https://nypost

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/ceo-says-shes-devoted-to-keeping-the-girl-in-girl-scouts/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/entire-hawaii-neighborhood-vanishes-as-lava-smothers-hundreds-of-homes/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/entire-hawaii-neighborhood-vanishes-as-lava-smothers-hundreds-of-homes/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/07/crossfit-exec-fired-after-calling-lgbt-community-sinners/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/07/crossfit-exec-fired-after-calling-lgbt-community-sinners/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/son-explains-why-he-wrote-spiteful-obituary-about-his-mom/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/son-explains-why-he-wrote-spiteful-obituary-about-his-mom/
DEBUG:__main__:S

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/ex-football-player-held-without-bail-in-classmates-death/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/247



DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/colorado-sheriffs-want-to-store-guns-in-schools-to-stop-mass-shootings/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/colorado-sheriffs-want-to-store-guns-in-schools-to-stop-mass-shootings/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/disney-legoland-employees-arrested-in-child-porn-sting/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/disney-legoland-employees-arrested-in-child-porn-sting/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/cops-search-for-critters-taken-from-college-zoo/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/cops-search-for-cr

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/trump-endorsed-republican-secures-spot-in-california-race/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/trump-endorsed-republican-secures-spot-in-california-race/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/israeli-troops-fatally-shoot-palestinian-stone-thrower-during-west-bank-raid/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/israeli-troops-fatally-shoot-palestinian-stone-thrower-during-west-bank-raid/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/250



DEBUG:__main__:Processing url: https://nypost.com/2018/06/06/volcano-of-fire-showing-more-signs-of-activity-in-guatemala/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/06/volcano-of-fire-showing-more-signs-of-activity-in-guatemala/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/caretakers-charged-in-death-of-disabled-man-found-entombed-in-concrete/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/caretakers-charged-in-death-of-disabled-man-found-entombed-in-concrete/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/teen-pleads-not-guilty-in-slaying-of-98-year-old-woman/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/teen-pleads-not-guilty-in-slaying-of-98-year-old-woman/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/plane-carrying-10-reported-missing-in-western-kenya/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/plane-carrying-10-reported-missing-in-western-kenya/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/trump-struggles-with-lyrics-to-god-bless-america/
DEBUG:__main__:Exctract

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/trump-wants-to-fight-ruling-that-he-cant-block-twitter-critics/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/epa-chief-had-aide-help-wife-get-chick-fil-a-job/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/epa-chief-had-aide-help-wife-get-chick-fil-a-job/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/mitch-mcconnell-cancels-senates-august-recess/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/mitch-mcconnell-cancels-senates-august-recess/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/infant-kidnapped-by-sex-offender-father-found-safe-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/infant-kidnapped-by-sex-offender-father-found-safe-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/201

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/man-gets-2-years-for-exporting-military-equipment-to-russia/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/six-flags-ride-abruptly-stops-leaves-passengers-stranded-in-the-air/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/six-flags-ride-abruptly-stops-leaves-passengers-stranded-in-the-air/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/nearly-half-of-californias-bay-area-residents-say-they-want-to-leave/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/nearly-half-of-californias-bay-area-residents-say-they-want-to-leave/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/05/clinton-defends-response-to-lewinsky-questions-i-got-hot-under-the-collar/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/05/clinton-defends-response-to-lewinsky-q

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/israel-uses-mean-girls-to-troll-iran-on-twitter/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/dad-turns-son-in-for-allegedly-killing-19-year-old-girlfriend/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/dad-turns-son-in-for-allegedly-killing-19-year-old-girlfriend/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/michigan-state-physicist-accused-of-having-sex-with-dog/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/michigan-state-physicist-accused-of-having-sex-with-dog/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/family-accuses-delta-of-trying-to-cover-up-dogs-bloody-death/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/family-accuses-delta-of-trying-to-cover-up-dogs-bloody-death/
DEBUG:__main__:Saved to DB
DEBUG:__main

DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/264



DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/supreme-court-rules-baker-can-refuse-to-make-gay-wedding-cake/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/supreme-court-rules-baker-can-refuse-to-make-gay-wedding-cake/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/pregnant-woman-among-five-shot-after-gunmen-open-fire-at-football-game/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/pregnant-woman-among-five-shot-after-gunmen-open-fire-at-football-game/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/04/disgraced-deputy-haunted-by-failure-at-florida-school-shooting/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/04/disgraced-deputy-haunted-by-failure-at-florida-school-shooting/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing u

DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/03/mattis-itll-be-a-bumpy-road-to-summit-with-north-korea/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/03/giuliani-thinks-trump-can-probably-pardon-himself/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/03/giuliani-thinks-trump-can-probably-pardon-himself/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/267



DEBUG:__main__:Processing url: https://nypost.com/2018/06/03/trudeau-says-tariffs-on-steel-and-aluminum-are-insulting/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/03/trudeau-says-tariffs-on-steel-and-aluminum-are-insulting/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/03/trump-i-wouldnt-have-hired-manafort-if-i-knew-fbi-was-going-after-him/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/03/trump-i-wouldnt-have-hired-manafort-

DEBUG:__main__:Processing url: https://nypost.com/2018/06/01/trump-says-summit-with-kim-jong-un-is-back-on/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/01/trump-says-summit-with-kim-jong-un-is-back-on/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/01/melania-trump-not-joining-husband-at-camp-david-this-weekend/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/01/melania-trump-not-joining-husband-at-camp-david-this-weekend/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/01/middle-schooler-traumatized-by-unannounced-active-shooter-drill-mom/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/01/middle-schooler-traumatized-by-unannounced-active-shooter-drill-mom/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/270



DEBUG:__main__:Processing url: https://nypost.com/2018/06/01/north-korean-official-arrives-at-wh

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/06/01/family-awarded-just-4-cents-for-mans-death-by-cop/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/06/01/family-awarded-just-4-cents-for-mans-death-by-cop/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/cops-placed-on-leave-after-video-shows-them-beating-man/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/cops-placed-on-leave-after-video-shows-them-beating-man/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/model-held-hostage-by-violent-ex-boyfriend-saved-by-selfie/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/model-held-hostage-by-violent-ex-boyfriend-saved-by-selfie/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/trump-visits-families-of-kids-killed-in-texas-school-shooting/
DEBUG:__main__:Exctracting features 

DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/us-allies-vow-to-retaliate-over-trumps-metal-tariffs/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/hawaii-man-pulls-gun-on-neighbor-checking-on-lava-threatened-home/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/hawaii-man-pulls-gun-on-neighbor-checking-on-lava-threatened-home/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/dinesh-dsouza-slams-bharara-after-being-pardoned/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/dinesh-dsouza-slams-bharara-after-being-pardoned/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/31/trump-appears-to-be-clearing-his-disgraced-apprentice-pals/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/31/trump-appears-to-be-clearing-his-disgraced-apprentice-pals/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Pr

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/2-face-drug-charges-amid-toddler-death-investigation/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/2-face-drug-charges-amid-toddler-death-investigation/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/trump-signs-right-to-try-bill-giving-terminally-ill-people-access-to-experimental-drugs/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/trump-signs-right-to-try-bill-giving-terminally-ill-people-access-to-experimental-drugs/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/isis-claims-credit-for-belgium-terror-attack/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/isis-claims-credit-for-belgium-terror-attack/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/how-a-russian-journalist-faked-his-death-to-save-his-life

DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/man-committing-suicide-accidentally-killed-roommate-too-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/mom-killed-herself-after-ovaries-removed-without-her-consent-family/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/mom-killed-herself-after-ovaries-removed-without-her-consent-family/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/cop-accused-of-beating-hospital-patient-covering-up-attack/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/cop-accused-of-beating-hospital-patient-covering-up-attack/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/teen-girlfriend-accused-of-hiring-hitman-who-killed-his-father/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/teen-girlfriend-accused-of-hiring-hitman-who-killed-his-father/
DEBU

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/israel-may-escalate-gaza-strikes-after-soldiers-injured-by-palestinian-rockets/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/israel-may-escalate-gaza-strikes-after-soldiers-injured-by-palestinian-rockets/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/284



DEBUG:__main__:Processing url: https://nypost.com/2018/05/30/cop-arrested-for-threatening-to-kill-woman-for-her-nude-photos/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/30/cop-arrested-for-threatening-to-kill-woman-for-her-nude-photos/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/29/lapd-investigating-52-sexual-misconduct-complaints-against-usc-gynecologist/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/29/lapd-investigating-52-sexual-misconduct-complaints-against-usc-gynecologist/
DEBUG:__ma

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/29/teacher-wanted-to-empower-women-to-be-serial-killers-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/29/teacher-wanted-to-empower-women-to-be-serial-killers-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/29/russian-journalist-who-criticized-kremlin-shot-to-death/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/29/russian-journalist-who-criticized-kremlin-shot-to-death/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/29/dad-was-playing-pokemon-while-french-spider-man-saved-tot/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/29/dad-was-playing-pokemon-while-french-spider-man-saved-tot/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/287



DEBUG:__main__:Processing url: https://nypost.com/2018/05/29/teen-mom-fatal

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/28/backpage-closure-hasnt-made-it-any-harder-for-sex-traffickers/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/28/backpage-closure-hasnt-made-it-any-harder-for-sex-traffickers/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/28/news-anchor-cameraman-killed-when-tree-topples-into-news-van/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/28/news-anchor-cameraman-killed-when-tree-topples-into-news-van/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/28/congressman-says-hes-an-alcoholic-quits-re-election-race/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/28/congressman-says-hes-an-alcoholic-quits-re-election-race/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/28/liberal-activists-tweet-photo-from-2014-in-attack-on-trump/
DEBUG:__m

DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/27/monster-lightning-storm-grinds-london-airport-to-a-halt/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/27/gulf-coast-braces-for-storm-alberto/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/27/gulf-coast-braces-for-storm-alberto/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/27/giuliani-firing-mueller-would-look-too-much-like-watergate/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/27/giuliani-firing-mueller-would-look-too-much-like-watergate/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/27/mexico-arrests-wife-of-drug-lord-blamed-for-us-heroin-epidemic/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/27/mexico-arrests-wife-of-drug-lord-blamed-for-us-heroin-epidemic/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.

DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/mysterious-werewolf-killed-in-montana/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/two-armed-bystanders-fatally-shot-restaurant-gunman-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/two-armed-bystanders-fatally-shot-restaurant-gunman-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/colossal-waste-border-patrol-union-chief-rips-trumps-buildup-of-troops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/colossal-waste-border-patrol-union-chief-rips-trumps-buildup-of-troops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/rape-suspect-found-not-guilty-after-showing-penis-in-court/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/rape-suspect-found-not-guilty-after-showing-penis-in-court/
DEBUG:__main__:Saved to DB
DEBUG:

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/bones-found-in-shipwreck-dont-belong-to-infamous-pirate/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/bones-found-in-shipwreck-dont-belong-to-infamous-pirate/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/court-rules-inmate-serving-life-cant-sue-because-hes-dead/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/court-rules-inmate-serving-life-cant-sue-because-hes-dead/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/explosion-at-toronto-restaurant-injures-15-people/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/25/explosion-at-toronto-restaurant-injures-15-people/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/25/lava-from-hawaii-volcano-now-entering-pacific-from-third-flow/
DEBUG:__main__:Exctracting features fr

DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/24/george-zimmerman-is-broke/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/301



DEBUG:__main__:Processing url: https://nypost.com/2018/05/24/stormy-daniels-again-wants-out-of-non-disclosure-agreement/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/24/stormy-daniels-again-wants-out-of-non-disclosure-agreement/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/24/sex-offender-caught-in-bed-with-teens-by-roommate-cops/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/24/sex-offender-caught-in-bed-with-teens-by-roommate-cops/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/24/deadbeat-son-who-was-sued-by-parents-has-one-week-to-move-out/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/24/deadbeat-son-who-was-sued-by-parents-has-one-week-to-move-o

DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/remains-found-in-drainage-pipe-idd-as-teen-who-vanished-over-2-years-ago/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/remains-found-in-drainage-pipe-idd-as-teen-who-vanished-over-2-years-ago/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/hawaii-volcanos-latest-phenomenon-blue-flames/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/hawaii-volcanos-latest-phenomenon-blue-flames/
DEBUG:__main__:Saved to DB
DEBUG:__main__:

 PROCESSING PAGE: https://nypost.com/news/page/304



DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/nearly-100-illegal-immigrants-found-in-trailer-during-traffic-stop/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/nearly-100-illegal-immigrants-found-in-trailer-during-traffic-stop/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/makeup-artist

DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/alleged-gay-village-serial-killer-appears-in-court/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/alleged-gay-village-serial-killer-appears-in-court/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/schumer-blames-trump-for-soaring-gas-prices/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/schumer-blames-trump-for-soaring-gas-prices/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/trump-lambastes-ms-13-refers-to-them-as-animals-again/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/trump-lambastes-ms-13-refers-to-them-as-animals-again/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/jared-kushner-finally-gets-permanent-security-clearance/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/jar

DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/ms-13-member-sentenced-to-40-years-in-prison-for-killing-teen-boy/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/ms-13-member-sentenced-to-40-years-in-prison-for-killing-teen-boy/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/thousands-of-vegas-casino-workers-ok-strike-that-could-leave-resorts-reeling/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/thousands-of-vegas-casino-workers-ok-strike-that-could-leave-resorts-reeling/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/recent-inspection-uncovers-violations-at-water-park-where-boy-was-decapitated/
DEBUG:__main__:Exctracting features from https://nypost.com/2018/05/23/recent-inspection-uncovers-violations-at-water-park-where-boy-was-decapitated/
DEBUG:__main__:Saved to DB
DEBUG:__main__:Processing url: https://nypost.com/2018/05/23/teen-charged-i

In [25]:
FIX AUTHORS

True