In [1]:
"""
News Sampler

- Scrape article URL, source, and timestamp from sources
- Compile data into DataFrame
- Send DataFrame to ElephantSQL DataBase
"""

print("")
print("-----------------------------")
print("--- Subverse News Sampler ---")
print("-----------------------------")
print("")

####################################################
################### Imports ########################
####################################################

from datetime import datetime
import psycopg2
import pandas as pd

# Local Imports
import sections
import scrapers
from sources import Source


####################################################
#################### Sources #######################
####################################################

breitbart = Source("Breitbart", "breitbart", sections.breitbart, scrapers.breitbart)
examiner = Source("The Washington Examiner", "examiner", sections.examiner, scrapers.examiner)
fox = Source("Fox News", "fox", sections.fox, scrapers.fox)
hill = Source("The Hill", "hill", sections.hill, scrapers.hill)
msnbc = Source("MSNBC", "msnbc", sections.msnbc, scrapers.msnbc)
nypost = Source("The New York Post", "nypost", sections.nypost, scrapers.nypost)
nyt = Source("The New York Times", "nyt", sections.nyt, scrapers.nyt)
vox = Source("Vox", "vox", sections.vox, scrapers.vox)
wapo = Source("The Washington Post", "wapo", sections.wapo, scrapers.wapo)
wsj = Source("The Wall Street Journal", "wsj", sections.wsj, scrapers.wsj)

# Create source list
sources = [breitbart, examiner, fox, hill, msnbc, nypost, nyt, vox, wapo, wsj]
sources = [fox, vox]


####################################################
################## Global Vars #####################
####################################################

# Create timestamp
day = datetime.now().day
month = datetime.now().month
year = datetime.now().year


-----------------------------
--- Subverse News Sampler ---
-----------------------------



In [2]:
####################################################
################# Scrape Sources ###################
####################################################

print("Scraping:")

# Loop over sources
for paper in sources:
    print(f"--- {paper.name}")
    # print(paper.sections[0], paper.article_URLs[0])
    new_articles = []
    paper.scraper(paper.sections, new_articles)
    paper.article_URLs = new_articles

print("")

Scraping:
--- Fox News
--- Vox



In [7]:
vox.article_URLs

['https://www.vox.com/recode',
 'https://www.vox.com/pages/newsletters',
 'https://www.vox.com/recode/2019/12/5/20997583/sue-desmond-hellmann-gates-foundation-departure',
 'https://www.vox.com/authors/theodore-schleifer',
 'https://www.vox.com/recode/2019/12/5/20997515/amazon-prime-delivery-late-delays-one-day-shipping',
 'https://www.vox.com/authors/jason-del-rey',
 'https://www.vox.com/recode/2019/12/5/20998013/amazon-s-team-leadership-women-jeff-bezos-tech-diversity',
 'https://www.vox.com/2018/1/23/16905844/media-landscape-verizon-amazon-comcast-disney-fox-relationships-chart',
 'https://www.vox.com/authors/rani-molla',
 'https://www.vox.com/authors/peter-kafka',
 'https://www.vox.com/recode/2019/12/3/20963601/tech-2020-democratic-presidential-candidates-facebook-google-technology',
 'https://www.vox.com/policy-and-politics/2019/12/3/20965463/tech-2020-candidate-policies-online-data-equifax',
 'https://www.vox.com/authors/emily-stewart',
 'https://www.vox.com/policy-and-politics/20