In [None]:
## News API
#import os.path, pkgutil
#import newsapi
#help(newsapi)
#from newsapi.newsapi_client import NewsApiClient
#from newsapi.articles import Articles
#from newsapi.sources import Sources#

In [66]:
# Dependencies
import tweepy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import time
import math

import requests

from pprint import pprint

%matplotlib inline

# News API
import newsapi
from newsapi.newsapi_client import NewsApiClient
from newsapi.articles import Articles
from newsapi.sources import Sources

# NLTK
import nltk

In [67]:
# Twitter API Keys
from config import (consumer_key, consumer_secret, access_token, access_token_secret)
#News API Keys
from config import newapi_key

In [68]:
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

#from config import (consumer_key, consumer_secret,
#                    access_token, access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# Init
newsapi = NewsApiClient(api_key='4b7e7bb7670e40f788f92667a6a2dd64')
newsapi_Articles = Articles(API_KEY=newapi_key)
newsapi_Sources = Sources(API_KEY=newapi_key)

### Method: getCurrentHeadlines() 
### Get the top headlines from newsapi. 
Default value country=US, lanuage=english, page_size=100 (max allowed) <br/>
Other paramers like q (query), sources, categories and pages can be passed in.

##### Note: country and category params cannot be used with sources param
*Category possible values:* 'business','entertainment','general','health','science','sports','technology'

**Returns a Dictionary** <br/>
Dictionary["totalResults"] key will provide the total number of articles that were returned by newsapi. <br/>
The method returns only Max of 500 articles <br/>

In [None]:
    #(str) category - The category you want to get headlines for! Valid values are:
    #'business','entertainment','general','health','science','sports','technology'
#   top_headlines = newsapi.get_everything(
#                               q='headlines, US', 
#                               #sources='bbc-news,the-verge', 
#                               language='en', page_size=30)
#                               #page_size=25, page=2)
#                             #category='sports')  

In [110]:
def getCurrentHeadlines(q=None, 
                        sources=None, 
                        language='en', 
                        country='us', 
                        category=None, 
                        page_size=100, 
                        page=None):
    if(sources != None):
        country = None
        category = None
        
    print(f"Calling => getCurrentHeadlines(q={q}, \n\
                                sources={sources},\n\
                                language={language},\n\
                                country={country},\n\
                                category={category},\n\
                                page_size={page_size},\n\
                                page={page})")

    results = newsapi.get_top_headlines(q, 
                                              sources, 
                                              language, 
                                              country, 
                                              category, 
                                              page_size, 
                                              page)
    results_count = results['totalResults'] 
    # return value
    top_headlines = results['articles']
    
    # the first 100 has already been pulled in the first call, 
    # so start pulling from page 2
    loopcount = int(results_count / 100)
    page = 2
    
    while(loopcount > 0):
        #print('INSIDE LOOP')
        #print(f"output list size = {len(top_headlines)}")
        
        results = None
        results = newsapi.get_top_headlines(q, 
                                          sources, 
                                          language, 
                                          country, 
                                          category, 
                                          page_size, 
                                          page)
        #print(f"page {page} pulled {results['totalResults']}")
        #print(f"loopcount {loopcount} pulled {len(results['articles'])}")
        
        top_headlines = top_headlines + results['articles']
        page += 1
        loopcount -= 1
        
        # break at 500 articles
        if(page > 5):
            break
         
    print(f"totalResults from newsapi.get_top_headlines() = {results_count} \n \
          top_headlines returned by the method = {len(top_headlines)}")
    return top_headlines

**Call Details**

*Default value country=US, lanuage=english, page_size=25
Other paramers like q (query), sources, categories and pages can be passed in.*

##### Note: country and category params cannot be used with sources param
Category possible values: 'business','entertainment','general','health','science','sports','technology'

*Sample:* 

**Correct Call:**

getCurrentHeadlines() *returns top headlines for US, English* <br/>
getCurrentHeadlines(sources='bbc-news') *returns top headlines from bbc-news, English*<br/>
getCurrentHeadlines(category='sports') *returns top headlines for sports, in English (not restricted to US)* <br/>
getCurrentHeadlines(q='FIFA World Cup', country=None) *returns top headlines for sports, in English (not restricted to US)* <br/>

**Incorrect Call: Category will be made None because sources is set.**

getCurrentHeadlines(sources='bbc-news', *'category='sports'*)

**Returns a Dictionary** <br/>
Dictionary["totalResults"] key will provide the total number of articles that were returned by newsapi. <br/>
The method returns only Max of 500 articles <br/>

In [111]:
news_sources = 'google-news,abc-news,cbs-news,cnbc,bbc-news,cnn,fox-news, \
                nbc-news,the-washington-post,the-washington-times'
#headlines = getCurrentHeadlines(sources=news_sources)
#headlines = getCurrentHeadlines(country=None)
headlines = getCurrentHeadlines()

Calling => getCurrentHeadlines(q=None, 
                                sources=None,
                                language=en,
                                country=us,
                                category=None,
                                page_size=100,
                                page=None)
totalResults from newsapi.get_top_headlines() = 20 
           top_headlines returned by the method = 20


In [112]:
type(headlines)

list

In [114]:
df = pd.DataFrame(headlines)
df.head()

Unnamed: 0,author,description,publishedAt,source,title,url,urlToImage
0,Daniella Silva,Federal officers in riot gear moved to reopen ...,2018-06-28T20:12:25Z,"{'id': 'nbc-news', 'name': 'NBC News'}",Federal officers move to reopen Portland ICE b...,https://www.nbcnews.com/news/us-news/federal-o...,https://media4.s-nbcnews.com/j/newscms/2018_26...
1,ABC News,President Donald Trump highlighted his economi...,2018-06-28T20:03:00Z,"{'id': 'abc-news', 'name': 'ABC News'}","Amid Harley feud, Trump hails economy at futur...",https://abcnews.go.com/Politics/wireStory/trum...,https://s.abcnews.com/images/Politics/WireAP_2...
2,NBC10 Staff,A suspect has been found in the mystery explos...,2018-06-28T19:53:49Z,"{'id': None, 'name': 'Nbcphiladelphia.com'}",Suspect Arrested in Mystery Bucks County Bombi...,https://www.nbcphiladelphia.com/news/local/ATF...,https://media.nbcphiladelphia.com/images/1200*...
3,Erin Jensen,"""I’m a business owner and people can run their...",2018-06-28T19:39:00Z,"{'id': 'usa-today', 'name': 'USA Today'}",Spike Mendelsohn condemns Red Hen's ouster of ...,https://www.usatoday.com/story/life/entertaint...,https://www.gannett-cdn.com/-mm-/1d9e9070b233b...
4,,There are reports that multiple people have be...,2018-06-28T19:29:14Z,"{'id': 'cnn', 'name': 'CNN'}",Shooting at Maryland newspaper,https://www.cnn.com/us/live-news/maryland-news...,https://cdn.cnn.com/cnnnext/dam/assets/1806281...


In [116]:
[article['source']['name'] for article in headlines]

['NBC News',
 'ABC News',
 'Nbcphiladelphia.com',
 'USA Today',
 'CNN',
 'Reuters',
 'USA Today',
 'CNN',
 'CNN',
 'The Wall Street Journal',
 'The Washington Post',
 'USA Today',
 'The New York Times',
 'CNN',
 'ABC News',
 'ABC News',
 'CBS News',
 'Forbes.com',
 'The New York Times',
 'ABC News']

In [113]:
titles = [article['title'] for article in headlines]
#df = pd.DataFrame(titles, columns=["colummn"])
#df.to_csv('list.csv', index=False)
titles

['Federal officers move to reopen Portland ICE building closed over protests',
 'Amid Harley feud, Trump hails economy at future factory site',
 'Suspect Arrested in Mystery Bucks County Bombings as Authorities Swarm 2 Scenes',
 "Spike Mendelsohn condemns Red Hen's ouster of Sarah Sanders, talks food policy in new podcast",
 'Shooting at Maryland newspaper',
 'Italy blocks EU summit agreement until migration demands met',
 'Jameis Winston suspended for three games, apologizes for Uber incident',
 'The Senate and the next Supreme Court justice: What you need to know',
 'Intrigue and uncertainty loom over Trump-Putin summit',
 "Behind PillPack's $1 Billion Sale, a Frustrated 32-Year-Old Pharmacist",
 "More than 600 charged in nation's largest health care fraud investigation",
 "Dangerous, oppressive heat wave to scorch central, eastern US as 'heat dome' expands",
 'House Passes Resolution Demanding Justice Dept. Turn Over Documents',
 'Puerto Rico pushes for statehood by 2021',
 'Man sus

In [25]:
news_top_headlines_df = pd.DataFrame.from_dict(headlines['articles'])

In [26]:
news_top_headlines_df

Unnamed: 0,author,description,publishedAt,source,title,url,urlToImage
0,Associated Press,The visit comes a week after Mrs. Trump travel...,2018-06-28T17:53:35.5479397Z,"{'id': 'nbc-news', 'name': 'NBC News'}",Melania Trump makes second trip to U.S.-Mexico...,http://www.nbcnews.com/politics/white-house/me...,https://media3.s-nbcnews.com/j/newscms/2018_26...
1,Noah Bierman,The White House and Kremlin announced Thursday...,2018-06-28T17:45:00+00:00,"{'id': 'google-news', 'name': 'Google News'}",Trump and Putin to meet in Finland amid Europe...,http://www.latimes.com/politics/la-na-pol-trum...,http://www.latimes.com/resizer/D0gEd3AX_aXVDrt...
2,"Devlin Barrett, Karoun Demirjian, Matt Zapotosky",The angry exchanges came as the House GOP appr...,2018-06-28T17:42:00Z,"{'id': 'the-washington-post', 'name': 'The Was...",‘I’m not trying to hide anything’: Rosenstein ...,https://www.washingtonpost.com/world/national-...,https://www.washingtonpost.com/resizer/O0QKJln...
3,Keris Lahiff,"Advanced Micro Devices, Rent-a-Center, Shake S...",2018-06-28T17:40:39Z,"{'id': 'cnbc', 'name': 'CNBC'}",Some of Wall Street’s most hated stocks are su...,https://www.cnbc.com/2018/06/28/wall-streets-m...,https://fm.cnbc.com/applications/cnbc.com/reso...
4,"Courtney Kube, Carol E. Lee, Kristen Welker","Until last week, there was little planning for...",2018-06-28T17:39:02.0530402Z,"{'id': 'nbc-news', 'name': 'NBC News'}",Planning for Trump's military parade finally g...,http://www.nbcnews.com/news/us-news/planning-t...,https://media2.s-nbcnews.com/j/newscms/2018_26...
5,"Jeremy Herb and Laura Jarrett, CNN",House Republicans sharply took aim Thursday at...,2018-06-28T17:38:44.4266281Z,"{'id': 'cnn', 'name': 'CNN'}","Rosenstein, Republicans clash in fiery hearing",http://us.cnn.com/2018/06/28/politics/rod-rose...,https://cdn.cnn.com/cnnnext/dam/assets/1805211...
6,"Kate Bennett, CNN","First lady Melania Trump landed in Tucson, Ari...",2018-06-28T17:38:28.3434111Z,"{'id': 'cnn', 'name': 'CNN'}",Melania Trump arrives in Arizona for her secon...,http://us.cnn.com/2018/06/28/politics/melania-...,https://cdn.cnn.com/cnnnext/dam/assets/1806281...
7,Kate Fazzini,The scammers likely weren’t trying to take a p...,2018-06-28T17:35:13Z,"{'id': 'cnbc', 'name': 'CNBC'}",The Red Hen restaurant that refused to serve S...,https://www.cnbc.com/2018/06/28/cyberattack-ha...,https://fm.cnbc.com/applications/cnbc.com/reso...
8,Christina Farr,Sources say that PillPack and Walmart were in ...,2018-06-28T17:30:47Z,"{'id': 'cnbc', 'name': 'CNBC'}",Amazon swooped in to buy PillPack for around $...,https://www.cnbc.com/2018/06/28/amazon-swooped...,https://fm.cnbc.com/applications/cnbc.com/reso...
9,Nick Miroff,ICE investigators sent their complaint to Home...,2018-06-28T17:25:00Z,"{'id': 'the-washington-post', 'name': 'The Was...","Seeking split from ICE, agents say Trump’s imm...",https://www.washingtonpost.com/world/national-...,https://www.washingtonpost.com/resizer/q3guv5b...


In [9]:
pd.DataFrame.from_dict(news_channel_name_list, orient='index')

Unnamed: 0,0
Anthony Kennedy’s retirement comes at a worrying time,The swing vote is lost just as the constitutio...
Outgoing national cyber chief: 150 monthly attacks on Israeli companies,Carmeli indicated that there were around five ...
Retiring Supreme Court Justice Anthony Kennedy’s Jewish legacy,Kennedy’s most notable liberal votes – on same...
'Trump will demand Iran will exit the whole of Syria',London based 'Al-Hayat' claims that the US gav...
Michael Bloomberg reportedly considering run for president in 2020,Bloomberg has said he will spend $80 million t...
Here comes BJ's Wholesale Club's IPO... (BJ),Here comes BJ's Wholesale Club's IPO... (BJ)
The top Test allrounders,"Get ICC official team, player, ODI, Test ranki..."
Photo index: the latest pictures,The best cricket photos from across the world
"Apple could bundle TV, music and news in a single subscription","According to a report from The Information, Ap..."
Supreme Court Denies Silk Road Founder's Petition to Review Life Sentence,The U.S. Supreme Court has denied Ross Ulbrich...


In [220]:
news_top_headlines_df['News Channel Name'] = news_channel_name_list

In [221]:
news_top_headlines_df

Unnamed: 0,author,description,publishedAt,source,title,url,urlToImage,News Channel Name
0,,A body was discovered at Janoris Jenkins&#39; ...,2018-06-27T20:59:00Z,"{'id': None, 'name': 'Sportingnews.com'}",Janoris Jenkins' brother charged with aggravat...,http://www.sportingnews.com/nfl/news/janoris-j...,http://images.performgroup.com/di/library/omni...,Sportingnews.com
1,,,2018-06-27T20:26:26Z,"{'id': None, 'name': 'Si.com'}","Germany Out, Brazil Through, Mexico Gets a Han...",https://www.si.com/soccer/2018/06/27/world-cup...,,Si.com
2,,,2018-06-27T20:01:14Z,"{'id': 'usa-today', 'name': 'USA Today'}",Tigers fire pitching coach Chris Bosio for 'in...,https://www.usatoday.com/story/sports/mlb/2018...,,USA Today
3,"Benjamin Oreskes, Joseph Serna",A shirtless man wearing only underwear shuts d...,2018-06-27T19:56:20Z,"{'id': None, 'name': 'Latimes.com'}",Publicity stunt triggers massive traffic jam i...,http://www.latimes.com/local/lanow/la-me-ln-do...,http://www.latimes.com/resizer/h1Qwgq8nIQDRp1F...,Latimes.com
4,,Ahmed Abu Khatallah was charged in the 2012 as...,2018-06-27T19:56:00Z,"{'id': None, 'name': 'Npr.org'}",Libyan Militant Gets 22-Year Sentence For Role...,https://www.npr.org/2018/06/27/623919870/libya...,https://media.npr.org/assets/img/2018/06/27/ap...,Npr.org
5,David Boroff,"When asked to go over the sequence of events, ...",2018-06-27T19:50:32Z,"{'id': None, 'name': 'Nydailynews.com'}",Police officer in Antwon Rose shooting admits ...,http://www.nydailynews.com/news/crime/ny-news-...,http://www.nydailynews.com/resizer/YD8KoyXor03...,Nydailynews.com
6,,,2018-06-27T19:48:06Z,"{'id': 'cnn', 'name': 'CNN'}",Anthony Kennedy's retirement just confirmed ev...,https://www.cnn.com/2018/06/27/politics/kenned...,,CNN
7,Jacob Kastrenakes,While the fight was hashed out using specific ...,2018-06-27T18:59:00Z,"{'id': 'the-verge', 'name': 'The Verge'}",Apple and Samsung settle seven-year long paten...,https://www.theverge.com/2018/6/27/17510908/ap...,https://cdn.vox-cdn.com/thumbor/k-GJfLQGAGfwMw...,The Verge
8,,,2018-06-27T18:57:27Z,"{'id': 'usa-today', 'name': 'USA Today'}",Comedy scorecard: Winners and losers in the Tr...,https://www.usatoday.com/story/life/tv/2018/06...,,USA Today
9,,,2018-06-27T18:15:00Z,"{'id': None, 'name': 'Nypost.com'}",Police artist makes touching portrait of slain...,https://nypost.com/2018/06/27/police-artist-ma...,,Nypost.com


In [58]:
query = 'Apple'
from_date = '2018-06-25'
to_date = '2018-06-25'
sort_by = 'popularity'
url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&sortBy={sort_by}&apiKey={newapi_key}"
print(url)

https://newsapi.org/v2/everything?q=Apple&from=2018-06-25&sortBy=popularity&apiKey=4b7e7bb7670e40f788f92667a6a2dd64


In [59]:
resp = requests.get(url)

In [57]:
resp.json()

{'status': 'ok',
 'totalResults': 2434,
 'articles': [{'source': {'id': None, 'name': 'Lifehacker.com'},
   'author': 'Inès Montfajon',
   'title': "Get a Direct Feed of Politics With Apple News' '2018 Midterm Elections' Section",
   'description': 'Apple’s News app, a curated feed of stories available on your iPhone and iPad, has a brand-new addition for political junkies (or those who just want a quick moment to catch up on the Insanity of the Day). Read more...',
   'url': 'https://lifehacker.com/get-a-direct-feed-of-politics-with-apple-news-2018-midt-1827103639',
   'urlToImage': 'https://i.kinja-img.com/gawker-media/image/upload/s--g-Ot10J0--/c_fill,fl_progressive,g_center,h_900,q_80,w_1600/wgcurtij8tp1vcyvsusv.jpg',
   'publishedAt': '2018-06-25T16:30:00Z'},
  {'source': {'id': None, 'name': 'Lifehacker.com'},
   'author': 'David Murphy',
   'title': 'How to Install the iOS 12 Public Beta Right Now',
   'description': 'Happy beta day! Apple has finally released iOS 12 to the non-