In [1]:
import os
import csv
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import alpaca_trade_api as tradeapi
from datetime import datetime, timedelta
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

sia = SentimentIntensityAnalyzer()

In [None]:
# # Updating Vader Lexicon with External Lexicon

# #  stock market lexicon
# stock_lex = pd.read_csv('lexicon_data/stock_lex.csv')
# stock_lex['sentiment'] = (stock_lex['Aff_Score'] + stock_lex['Neg_Score'])/2
# stock_lex = dict(zip(stock_lex.Item, stock_lex.sentiment))
# stock_lex = {k:v for k,v in stock_lex.items() if len(k.split(' '))==1}
# stock_lex_scaled = {}
# for k, v in stock_lex.items():
#     if v > 0:
#         stock_lex_scaled[k] = v / max(stock_lex.values()) * 4
#     else:
#         stock_lex_scaled[k] = v / min(stock_lex.values()) * -4

# # Loughran and McDonald
# positive = []
# with open('lexicon_data/lm_positive.csv', 'r') as f:
#     reader = csv.reader(f)
#     for row in reader:
#         positive.append(row[0].strip())
    
# negative = []
# with open('lexicon_data/lm_negative.csv', 'r') as f:
#     reader = csv.reader(f)
#     for row in reader:
#         entry = row[0].strip().split(" ")
#         if len(entry) > 1:
#             negative.extend(entry)
#         else:
#             negative.append(entry[0])

# final_lex = {}
# final_lex.update({word:2.0 for word in positive})
# final_lex.update({word:-2.0 for word in negative})
# final_lex.update(stock_lex_scaled)
# final_lex.update(sia.lexicon)
# sia.lexicon = final_lex

In [2]:
# download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/alexbeckham/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
api_key = os.getenv("news_api")
newsapi = NewsApiClient(api_key='d0bdfed67851479a8dfeac06a1c596be')


# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')



In [4]:
# Set the ticker
ticker = "WMT"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago 
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(15), tz="America/New_York").isoformat()

# Get one month's worth of historical data for WMT
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Display data
df.head()

Unnamed: 0_level_0,WMT,WMT,WMT,WMT,WMT
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2021-04-05 00:00:00-04:00,136.72,140.16,136.2,139.42,11264505
2021-04-06 00:00:00-04:00,139.25,140.9482,138.52,140.1083,8951686
2021-04-07 00:00:00-04:00,140.28,140.75,139.65,139.8,5533208
2021-04-08 00:00:00-04:00,140.43,141.12,139.66,139.72,5099788
2021-04-09 00:00:00-04:00,139.63,140.09,138.7138,139.79,5615862


In [5]:
#Data Cleanup

# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2021-04-05,139.42
2021-04-06,140.1083
2021-04-07,139.8
2021-04-08,139.72
2021-04-09,139.79


In [6]:
# Use the `pct_change` function to calculate daily returns of WMT
wmt_returns = df.pct_change().dropna()

# Display sample data
wmt_returns.head()

Unnamed: 0,close
2021-04-06,0.004937
2021-04-07,-0.0022
2021-04-08,-0.000572
2021-04-09,0.000501
2021-04-12,0.000215


In [7]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

# import requests

# url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"

# querystring = {"q":"walmart","pageNumber":"1","pageSize":"10","autoCorrect":"true","fromPublishedDate":"null","toPublishedDate":"null"}

# headers = {
#     'x-rapidapi-key': "7447b3b092mshd048dd40b2e614cp137970jsne94e5f8dd331",
#     'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com"
#     }

# response = requests.request("GET", url, headers=headers, params=querystring)

# print(response.text)

In [8]:
# Get walmart headlines
wmt_headlines, dates = get_headlines("walmart")

Fetching news about 'walmart'
******************************
retrieving news from: 2021-04-20 00:00:00
retrieving news from: 2021-04-19 00:00:00
retrieving news from: 2021-04-18 00:00:00
retrieving news from: 2021-04-17 00:00:00
retrieving news from: 2021-04-16 00:00:00
retrieving news from: 2021-04-15 00:00:00
retrieving news from: 2021-04-14 00:00:00
retrieving news from: 2021-04-13 00:00:00
retrieving news from: 2021-04-12 00:00:00
retrieving news from: 2021-04-11 00:00:00
retrieving news from: 2021-04-10 00:00:00
retrieving news from: 2021-04-09 00:00:00
retrieving news from: 2021-04-08 00:00:00
retrieving news from: 2021-04-07 00:00:00
retrieving news from: 2021-04-06 00:00:00


In [9]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [10]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(wmt_headlines):
    sentiment = []
    for day in wmt_headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [11]:
# Get averages of each wmt sentiment
wmt_avg = headline_sentiment_summarizer_avg(wmt_headlines)

In [12]:
topic_sentiments = pd.DataFrame(
    {
        "wmt_avg": wmt_avg,
    }
)


In [13]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [14]:
# Merge with WMT returns
topic_sentiments = wmt_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

Unnamed: 0,close,wmt_avg
2021-04-06,0.004937,0.254775
2021-04-07,-0.0022,0.259465
2021-04-08,-0.000572,0.28415
2021-04-09,0.000501,0.066335
2021-04-12,0.000215,0.02702
2021-04-13,-0.003218,-0.00703
2021-04-14,0.0,0.042845
2021-04-15,0.005668,0.033295
2021-04-16,0.002711,-0.000815
2021-04-19,-0.00555,0.12446


In [15]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,wmt_avg
close,1.0,0.182131
wmt_avg,0.182131,1.0


In [17]:
topic_sentiments.describe()

Unnamed: 0,close,wmt_avg
count,11.0,11.0
mean,0.000896,0.123167
std,0.003949,0.119499
min,-0.00555,-0.00703
25%,-0.001386,0.030158
50%,0.000215,0.066335
75%,0.003824,0.25712
max,0.00737,0.28415


In [19]:
# Make API call
wmt_headlines = newsapi.get_everything(
    q="walmart", language="en", sort_by="relevancy"
)

# Print total articles
print(f"Total articles about Walmart: {wmt_headlines['totalResults']}")

# Show sample article
wmt_headlines["articles"][0]

Total articles about Walmart: 5912


{'source': {'id': 'the-verge', 'name': 'The Verge'},
 'author': 'Jon Porter',
 'title': 'Companies can’t wait to accessorize your AirTags',
 'description': 'Apple’s AirTags trackers could launch alongside a wide array of third-party accessories, if retailer listings and leaked photos are accurate. eBay, Etsy, and even Walmart, are already selling keychains.',
 'url': 'https://www.theverge.com/2021/4/20/22393404/airtags-accessories-keychain-leaked-photos-listings-trackers-tile',
 'urlToImage': 'https://cdn.vox-cdn.com/thumbor/T525_DL-kBAxKzFURXaBzTwx5QI=/0x182:2538x1511/fit-in/1200x630/cdn.vox-cdn.com/uploads/chorus_asset/file/22456534/s_l1600.jpg',
 'publishedAt': '2021-04-20T08:26:02Z',
 'content': 'eBay listings show accessories to attach the trackers to a key chain. | Image: ta9146 / eBay\r\n\n \n\n Apple’s long-rumored AirTags trackers could launch alongside a wide array of third-party accessories… [+2045 chars]'}

In [20]:
wmt_df = pd.DataFrame(wmt_headlines["articles"])

wmt_df.head()

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content
0,"{'id': 'the-verge', 'name': 'The Verge'}",Jon Porter,Companies can’t wait to accessorize your AirTags,Apple’s AirTags trackers could launch alongsid...,https://www.theverge.com/2021/4/20/22393404/ai...,https://cdn.vox-cdn.com/thumbor/T525_DL-kBAxKz...,2021-04-20T08:26:02Z,eBay listings show accessories to attach the t...
1,"{'id': 'techcrunch', 'name': 'TechCrunch'}",Manish Singh,Flipkart to acquire online travel firm Cleartrip,Flipkart said on Thursday it has agreed to acq...,http://techcrunch.com/2021/04/15/flipkart-to-a...,https://techcrunch.com/wp-content/uploads/2019...,2021-04-15T08:39:31Z,Flipkart said on Thursday it has agreed to acq...
2,"{'id': 'techcrunch', 'name': 'TechCrunch'}",Jordan Crook,Swyft raises $17.5 million to bring same-day d...,Thanks to major players like Amazon and Walmar...,http://techcrunch.com/2021/04/07/swyft-raises-...,https://techcrunch.com/wp-content/uploads/2021...,2021-04-07T14:01:12Z,Thanks to major players like Amazon and Walmar...
3,"{'id': 'mashable', 'name': 'Mashable'}",Mashable BrandX and Walmart,Save your living room from a design slump with...,AD CONTENT FROM WALMARTWe can probably all agr...,https://mashable.com/ad/feature/living-room-sp...,https://mondrian.mashable.com/2021%252F03%252F...,2021-03-22T15:34:10Z,We can probably all agree that its been a very...
4,"{'id': None, 'name': 'New York Times'}",Christine Hauser,Arrest of Colorado Woman With Dementia Prompts...,"Karen Garner, 73, of Loveland, Colo., walked o...",https://www.nytimes.com/2021/04/20/us/karen-ga...,https://static01.nyt.com/images/2021/04/20/mul...,2021-04-20T19:43:05Z,The city said in its statement that it had rec...
