In [26]:
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
from datetime import datetime
import requests

In [2]:
df = pd.read_csv(Path('./data/us_equities_news_dataset.csv'))
df.head()

Unnamed: 0,id,ticker,title,category,content,release_date,provider,url,article_id
0,221515,NIO,Why Shares of Chinese Electric Car Maker NIO A...,news,What s happening\nShares of Chinese electric c...,2020-01-15,The Motley Fool,https://invst.ly/pigqi,2060327
1,221516,NIO,NIO only consumer gainer Workhorse Group amon...,news,Gainers NIO NYSE NIO 7 \nLosers MGP Ingr...,2020-01-18,Seeking Alpha,https://invst.ly/pje9c,2062196
2,221517,NIO,NIO leads consumer gainers Beyond Meat and Ma...,news,Gainers NIO NYSE NIO 14 Village Farms In...,2020-01-15,Seeking Alpha,https://invst.ly/pifmv,2060249
3,221518,NIO,NIO NVAX among premarket gainers,news,Cemtrex NASDAQ CETX 85 after FY results \n...,2020-01-15,Seeking Alpha,https://invst.ly/picu8,2060039
4,221519,NIO,PLUG NIO among premarket gainers,news,aTyr Pharma NASDAQ LIFE 63 on Kyorin Pharm...,2020-01-06,Seeking Alpha,https://seekingalpha.com/news/3529772-plug-nio...,2053096


In [3]:
df.tail()

Unnamed: 0,id,ticker,title,category,content,release_date,provider,url,article_id
221508,443024,T,Crude And Steel Still In Sync,opinion,We have been reporting on the trade off betwee...,2012-10-04,Ivan Kitov,https://www.investing.com/analysis/crude-and-s...,138733
221509,443025,T,Forget AT T This Is The Telecom Stock You Sho...,opinion,It s the largest cell phone provider in the wo...,2012-05-30,StreetAuthority,https://www.investing.com/analysis/forget-at-t...,124829
221510,443026,T,Wall Street Exposed Part 3 How Dividends C...,opinion,Before we dicuss how the mechanism of dividend...,2012-07-16,Portfolio Cafe,https://www.investing.com/analysis/wall-street...,129651
221511,443027,T,Weighing The Week Ahead It s All About Jobs,opinion,From start to finish the coming week will hav...,2012-09-02,Jeff Miller,https://www.investing.com/analysis/weighing-th...,134926
221512,443028,T,Leap Wireless LEAP JPM Note and AT T T M...,opinion,Leap Wireless International Inc Leap is a ...,2011-12-31,Ophir Gottlieb,https://www.investing.com/analysis/leap-wirele...,110079


In [10]:
df.loc[df['ticker'] == 'VAS']

Unnamed: 0,id,ticker,title,category,content,release_date,provider,url,article_id


In [4]:
URL = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tickers = pd.read_html(URL)[0]['Symbol'].tolist()
tickers

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ATVI',
 'ADM',
 'ADBE',
 'ADP',
 'AAP',
 'AES',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AMD',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'AON',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'AZO',
 'AVB',
 'AVY',
 'BKR',
 'BALL',
 'BAC',
 'BBWI',
 'BAX',
 'BDX',
 'WRB',
 'BRK.B',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BK',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'CHRW',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'CNC',
 'CNP',
 'CDAY',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA',
 'CMA

In [5]:
len(tickers)

503

In [8]:
# Collect the top 10 stocks of the S&P 500 to use as proxy for the SPY.AX ETF
# Note - hardcoding the stocks as there does not appear to be an easier way to 
# programitcally retrieve the S&P500 constituent stocks
spy_top10 = [
    'AAPL', 'MSFT', 'AMZN', 'TSLA', 'GOOGL', 'GOOG', 'NVDA', 'BRK.B', 'META', 'UNH'
]

In [15]:
# Retrieve credentials
load_dotenv()
alpaca_api_key = os.getenv("APCA-API-KEY-ID")
alpaca_secret_key = os.getenv("APCA-API-SECRET-KEY")
if not(alpaca_api_key) or not(alpaca_secret_key):
    print("Failed to load API credentials")

In [30]:
# Get the news for each
alpaca_endpoint = 'https://data.alpaca.markets/v1beta1/news'

alpaca_headers = {
    'Apca-Api-Key-Id': alpaca_api_key,
    'Apca-Api-Secret-Key': alpaca_secret_key
}
parameters = {
    'symbols': spy_top10[0],
    'start': pd.to_datetime("2022-07-01").strftime('%Y-%m-%d'),
    # 'start': '2021-12-28T00:00:00Z',
    'end': pd.to_datetime("2022-07-05").strftime('%Y-%m-%d'),
    # 'end': '2021-12-31T11:59:59Z',
    'limit': 50,
    'include_content': True,
}

response = requests.get(
    url = alpaca_endpoint,
    headers = alpaca_headers,
    params = parameters
)

response.raise_for_status()
# display(response.status)
display(response.json())

{'news': [{'author': 'Shanthi Rexaline',
   'content': '<p>The first half of 2022 was nightmarish for the stock market, with fears of an imminent recession and supply chain disruptions working in the mind of investors.</p>\r\n\r\n<p>Tech stocks have invariably led the broader market in the past, be it a rally or a retreat, and this time was no exception. Despite fairly robust fundamental performances amid the trying backdrop, big techs came under significant selling pressure.</p>\r\n\r\n<p>Here&#39;s a look at how some of the high-profile tech stocks, commonly referred to using the acronym &quot;FAANG&quot; fared in the first half of the year:</p>\r\n\r\n<h3>The FAANG Stocks</h3>\r\n\r\n<p><strong>Meta Platforms, Inc. </strong>(NASDAQ:<a class="ticker" href="https://www.benzinga.com/stock/META#NASDAQ">META</a>)<br />\r\n<strong>Amazon, Inc. </strong>(NASDAQ:<a class="ticker" href="https://www.benzinga.com/stock/AMZN#NASDAQ">AMZN</a>)<br />\r\n<strong>Apple, Inc. </strong>(NASDAQ:<a cla