In [1]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pymongo
from config import api_key
from pprint import pprint

In [2]:
base_url = "https://yfapi.net/v6/finance/quote"
#List of stock tickers to pull data on. These are mainly the top 50 stocks of the S&P 500
tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'FB', 'GOOG', 'TSLA', 'NVDA', 'JPM', 'UNH',
          'JNJ', 'PG', 'V', 'HD', 'BAC', 'XOM', 'MA', 'DIS', 'PFE', 'CVX',
          'ABBV', 'KO', 'CSCO', 'AVGO', 'COST', 'PEP', 'VZ', 'ADBE', 'TMO', 'ABT',
          'CMCSA', 'WFC', 'CRM', 'LLY', 'ACN', 'INTC', 'WMT', 'MRK', 'AMD', 'QCOM',
          'DHR', 'NKE', 'T', 'NFLX', 'UNP', 'PM', 'LOW', 'TXN', 'ROKU']

In [34]:
# Accesses the Yahoo Finance API and pulls data
stock_data = []
for ticker in tickers:
    stock_dict = {}
    try:
        querystring = {"symbols": ticker}

        headers = {
            'x-api-key': api_key
            }
        
        response = requests.get(base_url, headers=headers, params=querystring).json()
        symbol = response['quoteResponse']['result'][0]['symbol']
        day_open = response['quoteResponse']['result'][0]['regularMarketOpen']
        day_high = response['quoteResponse']['result'][0]['regularMarketDayHigh']
        day_low = response['quoteResponse']['result'][0]['regularMarketDayLow']
        rating = response['quoteResponse']['result'][0]['averageAnalystRating']
        display_name = response['quoteResponse']['result'][0]['shortName']
        fifty_day_ma = response['quoteResponse']['result'][0]['fiftyDayAverage']
        two_hundred_day_ma = response['quoteResponse']['result'][0]['twoHundredDayAverage']
        
        stock_dict['ticker'] = symbol
        stock_dict['company_name'] = display_name
        stock_dict['day_open'] = day_open
        stock_dict['day_high'] = day_high
        stock_dict['day_low'] = day_low
        stock_dict['avg_analyst_rating'] = rating
        stock_dict['fifty_day_moving_avg'] = fifty_day_ma
        stock_dict['two_hundred_day_moving_avg'] = two_hundred_day_ma
        
        stock_data.append(stock_dict)
        
        print('---------')
        print(f'{symbol} Rating: {rating}')
        print(f'{symbol} Open: {day_open}; High: {day_high}; Low: {day_low}')
    
    except:
        print('---------')
        print(f'{ticker} Missing Data')

---------
AAPL Rating: 1.8 - Buy
AAPL Open: 167.99; High: 172.64; Low: 167.65
---------
GOOGL Rating: 1.7 - Buy
GOOGL Open: 2774.05; High: 2791.77; Low: 2757.01
---------
MSFT Rating: 1.7 - Buy
MSFT Open: 300.51; High: 303.22; Low: 297.73
---------
AMZN Rating: 1.7 - Buy
AMZN Open: 3274.1; High: 3326.525; Low: 3253.74
---------
FB Rating: 2.1 - Buy
FB Open: 213.33; High: 216.7988; Low: 212.16
---------
GOOG Rating: 1.5 - Strong Buy
GOOG Open: 2782.77; High: 2800.5; Low: 2763.34
---------
TSLA Rating: 2.5 - Buy
TSLA Open: 979.94; High: 1040.7; Low: 976.75
---------
NVDA Rating: 2.0 - Buy
NVDA Open: 261.26; High: 266.115; Low: 255.7501
---------
JPM Rating: 2.5 - Buy
JPM Open: 140.98; High: 141.585; Low: 139.1999
---------
UNH Rating: 1.9 - Buy
UNH Open: 502.27; High: 507.06; Low: 501.3348
---------
JNJ Rating: 2.1 - Buy
JNJ Open: 175.23; High: 175.54; Low: 174.09
---------
PG Rating: 2.4 - Buy
PG Open: 152.2; High: 153.01; Low: 150.69
---------
V Rating: 1.7 - Buy
V Open: 215.3; High: 2

In [40]:
# Converts API data to a data fram and exports it to a CSV for transforming
stock_data_df = pd.DataFrame(stock_data)
stock_data_df.to_csv('yfapi_data.csv')

In [8]:
# Scraping data from MarketWatch as an alternate data source
url = 'https://www.marketwatch.com/'
# Scraping for latest news article on each ticker
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url)



Could not get version for google-chrome with the command:  powershell "$ErrorActionPreference='silentlycontinue' ; (Get-Item -Path "$env:PROGRAMFILES\Google\Chrome\Application\chrome.exe").VersionInfo.FileVersion ; if (-not $? -or $? -match $error) { (Get-Item -Path "$env:PROGRAMFILES(x86)\Google\Chrome\Application\chrome.exe").VersionInfo.FileVersion } if (-not $? -or $? -match $error) { (Get-Item -Path "$env:LOCALAPPDATA\Google\Chrome\Application\chrome.exe").VersionInfo.FileVersion } if (-not $? -or $? -match $error) { reg query "HKCU\SOFTWARE\Google\Chrome\BLBeacon" /v version } if (-not $? -or $? -match $error) { reg query "HKLM\SOFTWARE\Wow6432Node\Microsoft\Windows\CurrentVersion\Uninstall\Google Chrome" /v version }"
Current google-chrome version is UNKNOWN
Get LATEST chromedriver version for UNKNOWN google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/99.0.4844.51/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\Billy\.w

In [None]:
stock_articles = []

In [9]:
# Scraping for latest news article, date of article, and analyst rating on each ticker
for ticker in tickers:
    articles_dict = {}
    
    url = f'https://www.marketwatch.com/investing/stock/{ticker}?mod=over_search'
    browser.visit(url)
    
    html = browser.html
    soup = bs(html, 'html.parser')
    
    analyst_rating = soup.find('li', class_='analyst__option active')
    analyst_rating = analyst_rating.text
    latest_article = soup.find('h3', class_='article__headline')
    article_title = latest_article.a.text.strip()
    article_link = latest_article.a['href']
    article_details = soup.find('div', class_='article__details')
    article_date = article_details.span.text
    
    articles_dict['ticker'] = ticker
    articles_dict['analyst_rating'] = analyst_rating
    articles_dict['article_title'] = article_title
    articles_dict['article_date'] = article_date
    articles_dict['link'] = f'{article_link}'
    
    stock_articles.append(articles_dict)
    
    print('----------')
    print(ticker,": ", article_date)
    print(analyst_rating)
    print(article_title)
    print(f'{article_link}')

----------
WFC :  Mar. 23, 2022 at 5:28 p.m. ET
Over
Wells Fargo & Co. stock underperforms Wednesday when compared to competitors
https://www.marketwatch.com/story/wells-fargo-co-stock-underperforms-wednesday-when-compared-to-competitors-01648070908-06cd8fabf30d?mod=mw_quote_news
----------
CRM :  Mar. 23, 2022 at 4:45 p.m. ET
Buy
Salesforce.com Inc. stock underperforms Wednesday when compared to competitors
https://www.marketwatch.com/story/salesforce-com-inc-stock-underperforms-wednesday-when-compared-to-competitors-01648068351-6959849102d1?mod=mw_quote_news
----------
LLY :  Mar. 23, 2022 at 5:04 p.m. ET
Over
Eli Lilly & Co. stock outperforms market despite losses on the day
https://www.marketwatch.com/story/eli-lilly-co-stock-outperforms-market-despite-losses-on-the-day-01648069462-3bf2745893dc?mod=mw_quote_news
----------
ACN :  Mar. 23, 2022 at 4:33 p.m. ET
Over
Accenture PLC Cl A stock outperforms competitors despite losses on the day
https://www.marketwatch.com/story/accenture-

In [11]:
browser.quit()

In [14]:
# Converts scraped data to a data frame and exports to csv for transformation
stock_articles_df = pd.DataFrame(stock_articles)
stock_articles_df.head()

Unnamed: 0,ticker,analyst_rating,article_title,article_date,link
0,AAPL,Over,Apple Inc. stock outperforms market on strong ...,"Mar. 23, 2022 at 4:31 p.m. ET",https://www.marketwatch.com/story/apple-inc-st...
1,GOOGL,Buy,Alphabet Inc. Cl A stock underperforms Wednesd...,"Mar. 23, 2022 at 4:31 p.m. ET",https://www.marketwatch.com/story/alphabet-inc...
2,MSFT,Buy,"Microsoft Corp. stock falls Wednesday, underpe...","Mar. 23, 2022 at 4:31 p.m. ET",https://www.marketwatch.com/story/microsoft-co...
3,AMZN,Buy,What Ryan Cohen’s Insider Buys Say About GameS...,"Mar. 23, 2022 at 6:19 p.m. ET",https://www.marketwatch.com/articles/gamestop-...
4,FB,Over,"Meta Platforms Inc. stock falls Wednesday, und...","Mar. 23, 2022 at 4:31 p.m. ET",https://www.marketwatch.com/story/meta-platfor...


In [15]:
stock_articles_df.to_csv('articles_scrape_data.csv')