In [1]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
import yfinance as yf
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newsapi import NewsApiClient

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
from fuzzywuzzy import process
import requests

In [3]:
# Retrieve the News API key
news_api = os.getenv("news_api")

# Create the newsapi client
newsapi = NewsApiClient(api_key=news_api)

In [4]:
# Retrieve the NYtimes API key
nyt_api = os.getenv("nytimes_secret")

In [5]:
company_request = 'microsoft'

In [6]:
def getCompany(text):
    r = requests.get('https://api.iextrading.com/1.0/ref-data/symbols')
    stockList = r.json()
    return process.extractOne(text, stockList)[0]

In [7]:
company = getCompany(company_request)

In [8]:
symbol = company['symbol']
symbol

'MSFT'

In [9]:
company_name = company['name']

In [10]:
company_name

'MICROSOFT CORP'

In [11]:
data = yf.download(symbol, parse_dates=True, infer_datetime_format=True)

[*********************100%***********************]  1 of 1 completed


In [12]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1986-03-13,0.088542,0.101562,0.088542,0.097222,0.062549,1031788800
1986-03-14,0.097222,0.102431,0.097222,0.100694,0.064783,308160000
1986-03-17,0.100694,0.103299,0.100694,0.102431,0.065899,133171200
1986-03-18,0.102431,0.103299,0.098958,0.099826,0.064224,67766400
1986-03-19,0.099826,0.100694,0.097222,0.098090,0.063107,47894400
...,...,...,...,...,...,...
2019-12-03,147.490005,149.429993,146.649994,149.309998,149.309998,24066000
2019-12-04,150.139999,150.179993,149.199997,149.850006,149.850006,17574700
2019-12-05,150.050003,150.320007,149.479996,149.929993,149.929993,17869100
2019-12-06,150.990005,151.869995,150.270004,151.750000,151.750000,16403500


In [13]:
data['Daily Return'] = data['Close'].dropna().pct_change()
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1986-03-13,0.088542,0.101562,0.088542,0.097222,0.062549,1031788800,
1986-03-14,0.097222,0.102431,0.097222,0.100694,0.064783,308160000,0.035714
1986-03-17,0.100694,0.103299,0.100694,0.102431,0.065899,133171200,0.017241
1986-03-18,0.102431,0.103299,0.098958,0.099826,0.064224,67766400,-0.025424
1986-03-19,0.099826,0.100694,0.097222,0.098090,0.063107,47894400,-0.017391
...,...,...,...,...,...,...,...
2019-12-03,147.490005,149.429993,146.649994,149.309998,149.309998,24066000,-0.001605
2019-12-04,150.139999,150.179993,149.199997,149.850006,149.850006,17574700,0.003617
2019-12-05,150.050003,150.320007,149.479996,149.929993,149.929993,17869100,0.000534
2019-12-06,150.990005,151.869995,150.270004,151.750000,151.750000,16403500,0.012139


In [14]:
# Set short and long windows
short_window = 5
long_window = 21

# Construct a `Fast` and `Slow` Exponential Moving Average from short and long windows, respectively
data['fast_close'] = data['Close'].ewm(halflife=short_window).mean()
data['slow_close'] = data['Close'].ewm(halflife=long_window).mean()

# Construct a crossover trading signal
data['crossover_long'] = np.where(data['fast_close'] > data['slow_close'], 1.0, 0.0)
data['crossover_short'] = np.where(data['fast_close'] < data['slow_close'], -1.0, 0.0)
data['crossover_signal'] = data['crossover_long'] + data['crossover_short']

data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily Return,fast_close,slow_close,crossover_long,crossover_short,crossover_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1986-03-13,0.088542,0.101562,0.088542,0.097222,0.062549,1031788800,,0.097222,0.097222,0.0,0.0,0.0
1986-03-14,0.097222,0.102431,0.097222,0.100694,0.064783,308160000,0.035714,0.099078,0.098987,1.0,0.0,1.0
1986-03-17,0.100694,0.103299,0.100694,0.102431,0.065899,133171200,0.017241,0.100354,0.100173,1.0,0.0,1.0
1986-03-18,0.102431,0.103299,0.098958,0.099826,0.064224,67766400,-0.025424,0.100193,0.100082,1.0,0.0,1.0
1986-03-19,0.099826,0.100694,0.097222,0.09809,0.063107,47894400,-0.017391,0.099649,0.099657,0.0,-1.0,-1.0


In [15]:
# Plot the EMA of BTC/USD closing prices
#data[['Close', 'fast_close', 'slow_close']].plot(figsize=(20,10))

In [16]:
news_data = newsapi.get_everything(
    q=company_name or symbol, language="en", page_size=100
)

# Show the total number of news
news_data["totalResults"]

1113

In [44]:
#news_data['articles']

In [18]:
analyzer = SentimentIntensityAnalyzer()

In [19]:
# Define a function to get the sentiment scores
def get_sentiment_scores(text, date, source, url):
    sentiment_scores = {}

    # Sentiment scoring with VADER
    text_sentiment = analyzer.polarity_scores(text)
    sentiment_scores["date"] = date
    sentiment_scores["text"] = text
    sentiment_scores["source"] = source
    sentiment_scores["url"] = url
    sentiment_scores["compound"] = text_sentiment["compound"]
    sentiment_scores["pos"] = text_sentiment["pos"]
    sentiment_scores["neu"] = text_sentiment["neu"]
    sentiment_scores["neg"] = text_sentiment["neg"]
    if text_sentiment["compound"] >= 0.05:  # Positive
        sentiment_scores["normalized"] = 1
    elif text_sentiment["compound"] <= -0.05:  # Negative
        sentiment_scores["normalized"] = -1
    else:
        sentiment_scores["normalized"] = 0  # Neutral

    return sentiment_scores

In [20]:
# Empty list to store the DataFrame structure
sentiments_data = []

# Loop through all the news articles
for article in news_data["articles"]:
    try:
        # Get sentiment scoring using the get_sentiment_score() function
        sentiments_data.append(
            get_sentiment_scores(
                article["content"],
                article["publishedAt"][:10],
                article["source"]["name"],
                article["url"],
            )
        )

    except AttributeError:
        pass

# Create a DataFrame with the news articles' data and their sentiment scoring results
news_df = pd.DataFrame(sentiments_data)

# Sort the DataFrame rows by date
news_df = news_df.sort_values(by="date")

# Define the date column as the DataFrame's index
news_df.set_index("date", inplace=True)
news_df.head()


Unnamed: 0_level_0,text,source,url,compound,pos,neu,neg,normalized
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-11-09,"On CNBC's ""Mad Money Lightning Round,"" Jim Cra...",Yahoo.com,https://finance.yahoo.com/news/cramer-weighs-n...,0.5984,0.133,0.807,0.06,1
2019-11-09,We are still in an overall bull market and man...,Yahoo.com,https://finance.yahoo.com/news/orthopediatrics...,0.8519,0.22,0.78,0.0,1
2019-11-09,"Nov 9, 2019 (Thomson StreetEvents) -- Edited T...",Yahoo.com,https://finance.yahoo.com/news/edited-transcri...,0.0,0.0,1.0,0.0,0
2019-11-10,Saudi state oil giant Aramco will sell 0.5% of...,The Hindu,https://www.thehindu.com/news/international/sa...,0.5267,0.093,0.907,0.0,1
2019-11-11,WASHINGTON (Reuters) - Microsoft Corp said in ...,Reuters,https://www.reuters.com/article/us-usa-privacy...,0.7184,0.214,0.786,0.0,1


In [21]:
news_df.tail(20)

Unnamed: 0_level_0,text,source,url,compound,pos,neu,neg,normalized
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-12-05,U.S. Federal Trade Commission (FTC) has broade...,The Hindu,https://www.thehindu.com/sci-tech/technology/a...,0.0258,0.029,0.971,0.0,0
2019-12-05,Saudi Aramco raised $25.6 billion from the wor...,Time,https://time.com/5745022/saudi-arabia-aramco-i...,-0.2023,0.0,0.958,0.042,-1
2019-12-05,Image source: The Motley Fool.\r\nSecureWorks ...,Fool.com,https://www.fool.com/earnings/call-transcripts...,0.0,0.088,0.823,0.088,0
2019-12-06,We are still in an overall bull market and man...,Yahoo.com,https://finance.yahoo.com/news/hedge-funds-cas...,0.8176,0.184,0.816,0.0,1
2019-12-06,We are still in an overall bull market and man...,Yahoo.com,https://finance.yahoo.com/news/did-hedge-funds...,0.8176,0.184,0.816,0.0,1
2019-12-06,"By Peter Frontini\r\nSAO PAULO, Dec 6 (Reuters...",Yahoo.com,https://finance.yahoo.com/news/brazil-court-sa...,0.0,0.0,1.0,0.0,0
2019-12-06,"Amid an overall bull market, many stocks that ...",Yahoo.com,https://finance.yahoo.com/news/were-hedge-fund...,0.8176,0.196,0.804,0.0,1
2019-12-06,We are still in an overall bull market and man...,Yahoo.com,https://finance.yahoo.com/news/hedge-funds-lov...,0.8176,0.184,0.816,0.0,1
2019-12-06,"Just a few short years ago, buying up cryptocu...",Lifehacker.com,https://twocents.lifehacker.com/invest-in-cryp...,-0.2023,0.054,0.875,0.07,-1
2019-12-06,By Pavel Alpeyev and Takahiko HyugaSoftBank Gr...,The Times of India,https://economictimes.indiatimes.com/small-biz...,-0.34,0.028,0.911,0.061,-1


In [22]:
avg_sent_data = (
    news_df[["compound", "normalized"]].groupby(by=news_df.index).mean()
)
avg_sent_data.head()


Unnamed: 0_level_0,compound,normalized
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-11-09,0.483433,0.666667
2019-11-10,0.5267,1.0
2019-11-11,0.73815,1.0
2019-11-12,0.245633,0.666667
2019-11-13,0.525633,1.0


In [23]:
avg_sent_data

Unnamed: 0_level_0,compound,normalized
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-11-09,0.483433,0.666667
2019-11-10,0.5267,1.0
2019-11-11,0.73815,1.0
2019-11-12,0.245633,0.666667
2019-11-13,0.525633,1.0
2019-11-14,0.495325,1.0
2019-11-15,0.5214,0.727273
2019-11-16,0.4391,0.666667
2019-11-18,0.697633,1.0
2019-11-19,0.3592,0.5


In [24]:
signal_data =  pd.merge(data, avg_sent_data, left_index=True, right_index=True)

In [25]:
signal_data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Daily Return,fast_close,slow_close,crossover_long,crossover_short,crossover_signal,compound,normalized
2019-11-11,145.339996,146.419998,144.729996,146.110001,145.614517,14362600,0.001028,143.395147,139.511289,1.0,0.0,1.0,0.73815,1.0
2019-11-12,146.279999,147.570007,146.059998,147.070007,146.571259,18641600,0.00657,143.870856,139.756707,1.0,0.0,1.0,0.245633,0.666667
2019-11-13,146.740005,147.460007,146.279999,147.309998,146.81044,16919200,0.001632,144.316051,140.001949,1.0,0.0,1.0,0.525633,1.0
2019-11-14,147.020004,148.410004,147.0,148.059998,147.557892,19729800,0.005091,144.800703,140.263579,1.0,0.0,1.0,0.495325,1.0
2019-11-15,148.929993,149.990005,148.270004,149.970001,149.461426,23485700,0.0129,145.469865,140.57873,1.0,0.0,1.0,0.5214,0.727273
2019-11-18,150.070007,150.550003,148.979996,150.339996,149.83017,21534000,0.002467,146.100301,140.895661,1.0,0.0,1.0,0.697633,1.0
2019-11-19,150.880005,151.330002,150.199997,150.389999,149.87999,23935700,0.000333,146.6556,141.203925,1.0,0.0,1.0,0.3592,0.5
2019-11-20,150.309998,150.839996,148.460007,149.619995,149.619995,25696800,-0.00512,147.039339,141.47718,1.0,0.0,1.0,0.5376,1.0
2019-11-21,149.399994,149.800003,148.5,149.479996,149.479996,18576100,-0.000936,147.355281,141.737017,1.0,0.0,1.0,0.421671,1.0
2019-11-22,150.070007,150.300003,148.820007,149.589996,149.589996,15901800,0.000736,147.644564,141.991989,1.0,0.0,1.0,0.099733,0.333333


In [26]:
import requests
import json

In [27]:
# Create parameterized url
request_url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?q=" + company_name + "&api-key=" + nyt_api + "&offset=500"

# Submit request and format output
response_data = requests.get(request_url).json()
#print(json.dumps(response_data, indent=4))

# Select fact 
#response_data['text']

In [28]:
#response_data['response']['docs']

"&page=" +i +
+ "&api-key="nyt_api

SyntaxError: invalid syntax (<ipython-input-28-fa06cdc01aeb>, line 3)

In [29]:
import time
response=[]
for i in range(100):
    try:
        request_url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?q=" + company_name + "&page=" + str(i) + "&api-key=" + nyt_api
        
        response_data = requests.get(request_url).json()
        data = response_data['response']['docs']
        time.sleep(6)
        for x in data:
            response.append(x)
    except:
        pass

In [51]:
response[-1]

{'abstract': 'A look at which industries and individuals have given members of the Joint Select Committee on Deficit Reduction the most campaign contributions.',
 'web_url': 'https://economix.blogs.nytimes.com/2011/08/15/who-pays-the-supercommittee/',
 'snippet': 'A look at which industries and individuals have given members of the Joint Select Committee on Deficit Reduction the most campaign contributions.',
 'lead_paragraph': 'The 12 members of the “supercommittee” that will try to develop yet another bipartisan fiscal policy proposal have now been named. What types of spending programs and tax breaks should we expect these members to care about most?',
 'source': 'The New York Times',
 'multimedia': [],
 'headline': {'main': 'Who Pays the Supercommittee?',
  'kicker': 'Economix',
  'content_kicker': None,
  'print_headline': None,
  'name': None,
  'seo': None,
  'sub': None},
 'keywords': [{'name': 'subject',
   'value': 'Campaign Finance',
   'rank': 1,
   'major': 'N'},
  {'name'

In [34]:
response2 = pd.DataFrame(response)

In [35]:
response2

Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,headline,keywords,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,print_section,print_page,subsection_name
0,"In a legal complaint, Amazon said the presiden...",https://www.nytimes.com/2019/12/09/technology/...,"In a legal complaint, Amazon said the presiden...",SAN FRANCISCO — Amazon said in a legal complai...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Amazon Accuses Trump of ‘Improper Pr...,"[{'name': 'subject', 'value': 'Cloud Computing...",2019-12-09T16:20:47+0000,article,Business,Technology,"{'original': 'By Kate Conger', 'person': [{'fi...",News,nyt://article/4ec915d3-c88b-52d3-9ab2-28bd1587...,861,nyt://article/4ec915d3-c88b-52d3-9ab2-28bd1587...,,,
1,Tech companies are deploying artificial intell...,https://www.nytimes.com/2019/11/12/us/online-c...,Tech companies are deploying artificial intell...,Child sexual abuse photos and videos are among...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'How Laws Against Child Sexual Abuse ...,"[{'name': 'subject', 'value': 'Child Abuse and...",2019-11-12T08:00:06+0000,article,Investigative,U.S.,{'original': 'By Gabriel J.X. Dance and Michae...,News,nyt://article/bc591503-66e3-52d7-83e8-cfd488b2...,1171,nyt://article/bc591503-66e3-52d7-83e8-cfd488b2...,B,1,
2,"Makan Delrahim, Kevin Systrom and Bill Gates w...",https://www.nytimes.com/2019/11/11/business/de...,"Makan Delrahim, Kevin Systrom and Bill Gates w...","The nation’s tech giants, once celebrated as c...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'What, if Anything, Should Be Done to...","[{'name': 'subject', 'value': 'Antitrust Laws ...",2019-11-11T21:16:06+0000,article,SpecialSections,Business Day,"{'original': 'By Steve Lohr', 'person': [{'fir...",News,nyt://article/44363cf7-3b3e-53b4-9ef4-eaef66f2...,789,nyt://article/44363cf7-3b3e-53b4-9ef4-eaef66f2...,F,4,DealBook
3,"With its new Surface, Microsoft has made progr...",https://www.nytimes.com/2019/11/05/technology/...,"With its new Surface, Microsoft has made progr...","When Apple unveiled the iPad in 2010, Steve Jo...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Microsoft Surface Pro X Review: Powe...,"[{'name': 'subject', 'value': 'Computers and t...",2019-11-05T05:01:15+0000,article,Business,Technology,"{'original': 'By Brian X. Chen', 'person': [{'...",Review,nyt://article/e3784281-328d-5c7b-a21f-f39005c6...,1210,nyt://article/e3784281-328d-5c7b-a21f-f39005c6...,B,3,Personal Tech
4,The messaging service is suing a cybersurveill...,https://www.nytimes.com/2019/11/01/technology/...,The messaging service is suing a cybersurveill...,"Each week, we review the week’s news, offering...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Week in Tech: WhatsApp’s Spyware...,"[{'name': 'subject', 'value': 'Computers and t...",2019-11-01T13:00:06+0000,article,Business,Technology,"{'original': 'By Jamie Condliffe', 'person': [...",News,nyt://article/d1ae5989-c0ed-504e-99b3-c16aa1e7...,1251,nyt://article/d1ae5989-c0ed-504e-99b3-c16aa1e7...,B,4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,David Pogue reviews Microsoft's new search eng...,https://www.nytimes.com/video/technology/perso...,David Pogue reviews Microsoft's new search eng...,David Pogue reviews Microsoft's new search eng...,The New York Times,"[{'rank': 0, 'subtype': 'wide', 'caption': Non...","{'main': 'Microsoft Bing', 'kicker': None, 'co...","[{'name': 'organizations', 'value': 'Microsoft...",2009-07-09T17:47:14+0000,multimedia,Business,Technology,"{'original': 'None None', 'person': [{'firstna...",Video,nyt://video/663d6b7c-288b-580b-a4ef-8689cc547ad6,0,nyt://video/663d6b7c-288b-580b-a4ef-8689cc547ad6,,,Personal Tech
996,,https://www.nytimes.com/2006/04/26/technology/...,,,The New York Times,[],{'main': 'Briefing: BBC seeks 'teen brand' wit...,[],2006-04-26T05:00:00+0000,article,,Technology,"{'original': None, 'person': [], 'organization...",News,nyt://article/12ee78bb-4a54-582e-a62f-8db4d4a3...,341,nyt://article/12ee78bb-4a54-582e-a62f-8db4d4a3...,,,
997,"James Yost, chief information officer at Ford ...",https://www.nytimes.com/2000/09/18//IHT-big-sp...,"James Yost, chief information officer at Ford ...","James Yost, chief information officer at Ford ...",International Herald Tribune,[],{'main': 'Big Spending Days for Corporate Buye...,[],2000-09-18T05:00:00+0000,article,IHT News,Archives,"{'original': 'By Mitchell Martin, Internationa...",,nyt://article/a8ca7478-8c0c-5a44-a73b-6293ad89...,7428,nyt://article/a8ca7478-8c0c-5a44-a73b-6293ad89...,,,
998,After a long battle with the computer industry...,https://www.nytimes.com/2000/01/31/business/wo...,After a long battle with the computer industry...,After a long battle with the computer industry...,International Herald Tribune,[],"{'main': 'U.S. Removes an Encryption Barrier',...",[],2000-01-31T05:00:00+0000,article,IHT News,Business Day,"{'original': 'By Lee Dembart, International He...",,nyt://article/3b6230e1-9a68-536f-9885-5ff63598...,1993,nyt://article/3b6230e1-9a68-536f-9885-5ff63598...,,,


In [265]:
response2.reset_index(drop=True, inplace=True)

In [266]:
response2.head()

Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,headline,keywords,pub_date,document_type,news_desk,section_name,byline,type_of_material,_id,word_count,uri,print_section,print_page,subsection_name
0,"In a legal complaint, Amazon said that the pre...",https://www.nytimes.com/2019/12/09/technology/...,"In a legal complaint, Amazon said that the pre...",Amazon said in a legal complaint unsealed on M...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Amazon Accuses Trump of ‘Improper Pr...,"[{'name': 'subject', 'value': 'Cloud Computing...",2019-12-09T16:20:47+0000,article,Business,Technology,"{'original': 'By Kate Conger', 'person': [{'fi...",News,nyt://article/4ec915d3-c88b-52d3-9ab2-28bd1587...,776,nyt://article/4ec915d3-c88b-52d3-9ab2-28bd1587...,,,
1,Tech companies are deploying artificial intell...,https://www.nytimes.com/2019/11/12/us/online-c...,Tech companies are deploying artificial intell...,Child sexual abuse photos and videos are among...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'How Laws Against Child Sexual Abuse ...,"[{'name': 'subject', 'value': 'Child Abuse and...",2019-11-12T08:00:06+0000,article,Investigative,U.S.,{'original': 'By Gabriel J.X. Dance and Michae...,News,nyt://article/bc591503-66e3-52d7-83e8-cfd488b2...,1171,nyt://article/bc591503-66e3-52d7-83e8-cfd488b2...,B,1.0,
2,"Makan Delrahim, Kevin Systrom and Bill Gates w...",https://www.nytimes.com/2019/11/11/business/de...,"Makan Delrahim, Kevin Systrom and Bill Gates w...","The nation’s tech giants, once celebrated as c...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'What, if Anything, Should Be Done to...","[{'name': 'subject', 'value': 'Antitrust Laws ...",2019-11-11T21:16:06+0000,article,SpecialSections,Business Day,"{'original': 'By Steve Lohr', 'person': [{'fir...",News,nyt://article/44363cf7-3b3e-53b4-9ef4-eaef66f2...,789,nyt://article/44363cf7-3b3e-53b4-9ef4-eaef66f2...,F,4.0,DealBook
3,"With its new Surface, Microsoft has made progr...",https://www.nytimes.com/2019/11/05/technology/...,"With its new Surface, Microsoft has made progr...","When Apple unveiled the iPad in 2010, Steve Jo...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Microsoft Surface Pro X Review: Powe...,"[{'name': 'subject', 'value': 'Computers and t...",2019-11-05T05:01:15+0000,article,Business,Technology,"{'original': 'By Brian X. Chen', 'person': [{'...",Review,nyt://article/e3784281-328d-5c7b-a21f-f39005c6...,1210,nyt://article/e3784281-328d-5c7b-a21f-f39005c6...,B,3.0,Personal Tech
4,The messaging service is suing a cybersurveill...,https://www.nytimes.com/2019/11/01/technology/...,The messaging service is suing a cybersurveill...,"Each week, we review the week’s news, offering...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'The Week in Tech: WhatsApp’s Spyware...,"[{'name': 'subject', 'value': 'Computers and t...",2019-11-01T13:00:06+0000,article,Business,Technology,"{'original': 'By Jamie Condliffe', 'person': [...",News,nyt://article/d1ae5989-c0ed-504e-99b3-c16aa1e7...,1251,nyt://article/d1ae5989-c0ed-504e-99b3-c16aa1e7...,B,4.0,


In [257]:
response = pd.DataFrame(response_data)

In [156]:
response

Unnamed: 0,status,copyright,response
docs,OK,Copyright (c) 2019 The New York Times Company....,"[{'abstract': 'In a legal complaint, Amazon sa..."
meta,OK,Copyright (c) 2019 The New York Times Company....,"{'hits': 11105, 'offset': 0, 'time': 254}"


In [157]:
response = response['response']['docs']

In [158]:
#response[0]

In [54]:
# Define a function to get the sentiment scores
def get_sentiment_scores(abstract, lead_paragraph, date, url):
    sentiment_scores = {}

    # Sentiment scoring with VADER
    text_sentiment = analyzer.polarity_scores(lead_paragraph)
    sentiment_scores["pub_date"] = date
    sentiment_scores["abstract"] = abstract
    sentiment_scores["lead_paragraph"] = lead_paragraph
#    sentiment_scores["source"] = source
    sentiment_scores["web_url"] = url
    sentiment_scores["compound"] = text_sentiment["compound"]
    sentiment_scores["pos"] = text_sentiment["pos"]
    sentiment_scores["neu"] = text_sentiment["neu"]
    sentiment_scores["neg"] = text_sentiment["neg"]
    if text_sentiment["compound"] >= 0.05:  # Positive
        sentiment_scores["normalized"] = 1
    elif text_sentiment["compound"] <= -0.05:  # Negative
        sentiment_scores["normalized"] = -1
    else:
        sentiment_scores["normalized"] = 0  # Neutral

    return sentiment_scores

In [55]:
#response

In [57]:
# Empty list to store the DataFrame structure
sentiments_data = []

# Loop through all the news articles
for article in response:
    try:
        # Get sentiment scoring using the get_sentiment_score() function
        sentiments_data.append(
            get_sentiment_scores(
                article["abstract"],
                article["lead_paragraph"],
                article["pub_date"][:10],
#                article["source"],
                article["web_url"],
            )
        )

    except AttributeError:
        pass

# Create a DataFrame with the news articles' data and their sentiment scoring results
news_df = pd.DataFrame(sentiments_data)

# Sort the DataFrame rows by date
news_df = news_df.sort_values(by="pub_date")

# Define the date column as the DataFrame's index
news_df.set_index("pub_date", inplace=True)

In [59]:
news_df.shape

(1000, 8)

In [189]:
news_df.iloc[0]['lead_paragraph']

'Jeffrey Epstein, the convicted sex offender who committed suicide in prison, managed to lure an astonishing array of rich, powerful and famous men into his orbit.'

In [161]:
news_df.shape

(10, 9)

In [144]:
#response['response'][0]

In [145]:
# Define a function to get the sentiment scores
def get_sentiment_scores(text, date, source, url):
    sentiment_scores = {}

    # Sentiment scoring with VADER
    text_sentiment = analyzer.polarity_scores(text)
    sentiment_scores["date"] = date
    sentiment_scores["text"] = text
    sentiment_scores["source"] = source
    sentiment_scores["url"] = url
    sentiment_scores["compound"] = text_sentiment["compound"]
    sentiment_scores["pos"] = text_sentiment["pos"]
    sentiment_scores["neu"] = text_sentiment["neu"]
    sentiment_scores["neg"] = text_sentiment["neg"]
    if text_sentiment["compound"] >= 0.05:  # Positive
        sentiment_scores["normalized"] = 1
    elif text_sentiment["compound"] <= -0.05:  # Negative
        sentiment_scores["normalized"] = -1
    else:
        sentiment_scores["normalized"] = 0  # Neutral

    return sentiment_scores