In [1]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
import yfinance as yf
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
from fuzzywuzzy import process
import requests

In [3]:
# Retrieve the NYtimes API key
nyt_api = os.getenv("NYT_SECRET_KEY")

In [4]:
company_request = 'amazon'

In [5]:
def getCompany(text):
    r = requests.get('https://api.iextrading.com/1.0/ref-data/symbols')
    stockList = r.json()
    return process.extractOne(text, stockList)[0]

In [6]:
company = getCompany(company_request)

In [7]:
symbol = company['symbol']
company_name = company['name']
print(symbol)
print(company_name)

AMZN
AMAZON.COM INC


In [8]:
data = yf.download(symbol, parse_dates=True, infer_datetime_format=True)

[*********************100%***********************]  1 of 1 completed


In [9]:
data['Daily Return'] = data['Close'].dropna().pct_change()

In [10]:
# Set short and long windows
short_window = 5
long_window = 21

# Construct a `Fast` and `Slow` Exponential Moving Average from short and long windows, respectively
data['fast_close'] = data['Close'].ewm(halflife=short_window).mean()
data['slow_close'] = data['Close'].ewm(halflife=long_window).mean()

# Construct a crossover trading signal
data['crossover_long'] = np.where(data['fast_close'] > data['slow_close'], 1.0, 0.0)
data['crossover_short'] = np.where(data['fast_close'] < data['slow_close'], -1.0, 0.0)
data['crossover_signal'] = data['crossover_long'] + data['crossover_short']

data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily Return,fast_close,slow_close,crossover_long,crossover_short,crossover_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1997-05-15,2.4375,2.5,1.927083,1.958333,1.958333,72156000,,1.958333,1.958333,0.0,0.0,0.0
1997-05-16,1.96875,1.979167,1.708333,1.729167,1.729167,14700000,-0.117021,1.83582,1.841859,0.0,-1.0,-1.0
1997-05-19,1.760417,1.770833,1.625,1.708333,1.708333,6106800,-0.012048,1.787317,1.795874,0.0,-1.0,-1.0
1997-05-20,1.729167,1.75,1.635417,1.635417,1.635417,5467200,-0.042683,1.741121,1.753752,0.0,-1.0,-1.0
1997-05-21,1.635417,1.645833,1.375,1.427083,1.427083,18853200,-0.127388,1.659817,1.684036,0.0,-1.0,-1.0


In [11]:
analyzer = SentimentIntensityAnalyzer()

In [12]:
import requests
import json
import time

In [13]:
response=[]
for i in range(100):
    try:
        request_url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?q=" + company_name + "&page=" + str(i) + "&api-key=" + nyt_api
        
        response_data = requests.get(request_url).json()
        news_data = response_data['response']['docs']
        time.sleep(6)
        for x in news_data:
            response.append(x)
    except:
        pass

In [14]:
len(response)

1000

In [15]:
response2 = pd.DataFrame(response)

In [16]:
#response2

In [17]:
# Define a function to get the sentiment scores
def get_sentiment_scores(abstract, lead_paragraph, date, url):
    sentiment_scores = {}

    # Sentiment scoring with VADER
    text_sentiment = analyzer.polarity_scores(lead_paragraph)
    sentiment_scores["pub_date"] = date
    sentiment_scores["abstract"] = abstract
    sentiment_scores["lead_paragraph"] = lead_paragraph
#    sentiment_scores["source"] = source
    sentiment_scores["web_url"] = url
    sentiment_scores["compound"] = text_sentiment["compound"]
    sentiment_scores["pos"] = text_sentiment["pos"]
    sentiment_scores["neu"] = text_sentiment["neu"]
    sentiment_scores["neg"] = text_sentiment["neg"]
    if text_sentiment["compound"] >= 0.05:  # Positive
        sentiment_scores["normalized"] = 1
    elif text_sentiment["compound"] <= -0.05:  # Negative
        sentiment_scores["normalized"] = -1
    else:
        sentiment_scores["normalized"] = 0  # Neutral

    return sentiment_scores

In [18]:
# Empty list to store the DataFrame structure
sentiments_data = []

# Loop through all the news articles
for article in response:
    try:
        # Get sentiment scoring using the get_sentiment_score() function
        sentiments_data.append(
            get_sentiment_scores(
                article["abstract"],
                article["lead_paragraph"],
                article["pub_date"][:10],
#                article["source"],
                article["web_url"],
            )
        )

    except AttributeError:
        pass

# Create a DataFrame with the news articles' data and their sentiment scoring results
news_df = pd.DataFrame(sentiments_data)

# Sort the DataFrame rows by date
news_df = news_df.sort_values(by="pub_date")

# Define the date column as the DataFrame's index
news_df.set_index("pub_date", inplace=True)

In [19]:
news_sent = news_df[['compound','normalized']]
signal_data =  pd.concat([data, news_sent], axis=1)
signal_data.drop(signal_data.index[0], inplace=True)
signal_data.fillna(0, inplace=True)
signal_data= signal_data[signal_data.Open != 0]

In [20]:
signal_data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Daily Return,fast_close,slow_close,crossover_long,crossover_short,crossover_signal,compound,normalized
1997-05-15,2.437500,2.500000,1.927083,1.958333,1.958333,72156000.0,0.000000,1.958333,1.958333,0.0,0.0,0.0,0.0,0.0
1997-05-16,1.968750,1.979167,1.708333,1.729167,1.729167,14700000.0,-0.117021,1.835820,1.841859,0.0,-1.0,-1.0,0.0,0.0
1997-05-16,1.968750,1.979167,1.708333,1.729167,1.729167,14700000.0,-0.117021,1.835820,1.841859,0.0,-1.0,-1.0,0.0,0.0
1997-05-19,1.760417,1.770833,1.625000,1.708333,1.708333,6106800.0,-0.012048,1.787317,1.795874,0.0,-1.0,-1.0,0.0,0.0
1997-05-20,1.729167,1.750000,1.635417,1.635417,1.635417,5467200.0,-0.042683,1.741121,1.753752,0.0,-1.0,-1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-09,1750.660034,1766.890015,1745.609985,1749.510010,1749.510010,2442800.0,-0.001193,1764.601802,1776.338920,0.0,-1.0,-1.0,0.0,0.0
2019-12-09,1750.660034,1766.890015,1745.609985,1749.510010,1749.510010,2442800.0,-0.001193,1764.601802,1776.338920,0.0,-1.0,-1.0,0.0,0.0
2019-12-09,1750.660034,1766.890015,1745.609985,1749.510010,1749.510010,2442800.0,-0.001193,1764.601802,1776.338920,0.0,-1.0,-1.0,0.0,0.0
2019-12-09,1750.660034,1766.890015,1745.609985,1749.510010,1749.510010,2442800.0,-0.001193,1764.601802,1776.338920,0.0,-1.0,-1.0,0.0,0.0
