# MarketBeat Analyst Rating Scraper 

Iterates through the stock analyst upgrades/downgrades on [marketbeat.com](https://www.marketbeat.com/ratings/) by day and scrapes them into .csv file.

Historical analyst data can be used to look into whether or not following these calls have an edge, and whether not one analyst group is more credible than another.

In [1]:
# Import common libraries 

# Assert selenium chromedriver is up to date
from webdriver_manager.chrome import ChromeDriverManager

# Web scraping libraries 
from selenium import webdriver 
from selenium.webdriver.common.keys import Keys 

# BeautifulSoup 
import bs4 

# Data analysis libraries 
# import numpy as np 
# import pandas as pd 
# import matplotlib.pyplot as plt 

# Common libraries 
import os 
from pprint import pprint 
from tqdm import tqdm
import datetime as dt 
import time
import csv

In [2]:
# Close popup window if it comes up 
def handle_first_popup():
    if wb.find_element_by_id('optinform-modal'):
        wb.find_element_by_class_name('x').click()

# Close some other popup which can show up sometimes
def handle_other_popup():
    try:
        if wb.find_element_by_xpath('//*[@id="optinform-modal"]/div/span'):
            wb.find_element_by_xpath('//*[@id="optinform-modal"]/div/span').click()
    except:
        pass

In [3]:
def change_date(date):
    '''
    Changes the date on the analyst website for Marketbeat.
    This is necessary because using clearing the datefield with clear() will reset to the previous date 
    since there is no cursor inside the field.
    --------------------------------------------------------------------------------------------------
    INPUT:
    date - the date in either datetime or string format
    
    
    OUTPUT:
    None
    '''
    if type(date) == type(dt.datetime.today().date()):
        date = date.strftime('%m/%d/%Y')
    # Submitting date information 

    # Click on the date field
    date_field = wb.find_element_by_name('ctl00$cphPrimaryContent$txtStartDate')
    date_field.click()

    # Backspace the current date 
    for _ in range(11):
        webdriver.ActionChains(wb).send_keys(Keys.BACKSPACE).perform()

    # Send a new date
    date_field.send_keys(date)
    webdriver.ActionChains(wb).send_keys(Keys.ENTER).perform()

##  Example Output To Export to CSV
| Date       | Company | Action  | Brokerage | PT1 | PT2 | Rating | Impact |
|------------|---------|---------|-----------|-----|-----|--------|--------|
| 10/08/2020 | AAPL    | Upgrade | JPM       | 100 | 130 | Buy    | Medium |

In [4]:
def scrape_values(html):
    '''
    Pulls the tabular analyst data from the current marketbeat analyst page using BeautifulSoup4
    --------------------------------------------------------------------------------------------------
    INPUT:
    html - Can be from selenium page source or via requests.
    
    OUTPUT:
    scraped - 2D array of scraped values.
    '''
    scraped = []
    soup = bs4.BeautifulSoup(html)
    
    # The rows of the table are enclosed within the 'tr' tag.
    table_rows =  soup.find_all('tr')
    
    # Iterate through the rows of the table, ignoring the header
    for row in table_rows[1:]:
        
        # Grab the ticker & company name
        try:
            ticker = row.select('.ticker-area')[0].text.strip().upper()
            company_name = row.select('.title-area')[0].text.strip().upper()
        
        # One row in this table is always an ad (?)
        except IndexError:
            continue
        
        # The remaining columns do not have unique names and are enclosed in the 'td' tag
        # The first td tag includes the things scraped above, so we ignore it
        action_description = row.find_all('td')[1:]
        
        # Clean the list of their tags
        cleaned = list(map(lambda x: x.text.strip(), action_description))[:-1] # Last item is useless
        del cleaned[2] # Current price is useless
        
        # Extract 0, 1, or 2 price targets
        clean_targets = handle_price_targets(cleaned[2]) 
        
        # Extract 1 or 2 price targets
        clean_ratings = handle_ratings(cleaned[-2])
        
        # Formatted row for appending to master rows
        scraped_row = [page_date_str, ticker, company_name] + cleaned[:2] + clean_targets + clean_ratings + [cleaned[-1]]
        scraped.append(scraped_row)
    return scraped

In [5]:
def handle_price_targets(price_target):
    '''
    Marketbeat price target column can be empty, have one number, or two numbers.
    Use this function to return all the numbers in separate columns.
    --------------------------------------------------------------------------------------------------
    INPUT:
    price_target - string, targets in the format $\d\d.\d\d. Can include a '➝' character indicating a price change.
    
    OUTPUT:
    clean_targets - list of 2 floats or Nones. The previous price target and the new one.
    '''
    # Remove impurities 
    price_target = price_target.replace('$', '').replace(',', '')
    price_target = price_target.replace('(', '').replace(')', '')
    
    # Case 1: The field is blank.
    if price_target == '':
        clean_targets = [None, None]
    
    # Case 2: The price target has changed and is indicated by an arrow
    elif '➝' in price_target:
        clean_targets = price_target.split('➝')
        clean_targets = [float(ct.strip()) for ct in clean_targets]
    
    else:
        clean_targets = [None, float(price_target.strip())]
    
    return clean_targets

In [6]:
def handle_ratings(ratings):
    '''
    Marketbeat upgrades/downgrades show a change of rating (e.g. Neutral ➝ Outperform) or just have a single
    rating present (Neutral)
    Use this function to grab one or both actions.
    --------------------------------------------------------------------------------------------------
    INPUT:
    ratings - string, either has 0, 1, or 2 ratings
    
    OUTPUT:
    clean_ratings - list containing two different ratings, or one None and one rating.
    '''
    ratings = ratings.split('➝')
    if len(ratings) == 2:
        ratings = [rating.strip().upper() for rating in ratings]
    else:
        ratings = [None, ratings[0].upper()]
        if ratings[1] == '': ratings[1] = None
    
    return ratings

## Program Begins Here

In [7]:
# Check if a market beat data file does not exist
# If it doesn't, make one
file_list = os.listdir()
if 'marketbeat_analyst_data.csv' not in file_list:
    with open('marketbeat_analyst_data.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['date', 'ticker', 'company', 'action', 'brokerage', 'pt1', 'pt2', 'rating1', 'rating2', 'impact'])

In [8]:
# Instatiate a webdriver object 
wb = webdriver.Chrome(ChromeDriverManager().install())
url = 'https://www.marketbeat.com/ratings/'

# Navigate to the url 
wb.get(url)

[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - There is no [win32] chromedriver for browser 86.0.4240 in cache
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Trying to download new driver from http://chromedriver.storage.googleapis.com/86.0.4240.22/chromedriver_win32.zip


 


[WDM] - Driver has been saved in cache [C:\Users\mui\.wdm\drivers\chromedriver\win32\86.0.4240.22]


In [9]:
# Get rid of the first popup
handle_first_popup()

In [10]:
# Click dropdown to only allow US stocks
country_selector = wb.find_element_by_name('ctl00$cphPrimaryContent$ddlCountry')
country_selector.click()

#Click 'United States'
country_selector.find_element_by_xpath('//*[@id="cphPrimaryContent_ddlCountry"]/option[2]').click()

In [15]:
# Find out how many dates have been scraped already
# Prevents issues if exceptions occur mid scrape
with open('marketbeat_analyst_data.csv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f)
    completed_dates = [row[0] for row in list(reader)[1:]]
completed_dates = set(completed_dates)

# Arbitrary start time. To be user input.
start_date_dt = dt.datetime(year=2015, month=1, day=1).date()
start_date_str = start_date_dt.strftime('%m/%d/%Y')

page_date = start_date_dt

while page_date != (dt.datetime.today().date() + dt.timedelta(days=1)):
    # Handle weekends
    if page_date.strftime('%A') in ['Saturday', 'Sunday']:
        page_date += dt.timedelta(days=1)
        continue
        
    # Use try/except to deal with selenium issues 
    # i.e. page loading too fast and elements not loading
    try:
        page_date_str = page_date.strftime('%m/%d/%Y')
        if page_date_str in completed_dates:
            page_date += dt.timedelta(days=1)
            continue

        change_date(page_date)
        time.sleep(1.5)
        scraped = scrape_values(wb.page_source)
    
    except:
        # Close other popup if exists
        handle_other_popup()
        time.sleep(3)
        continue
        
    # Create a file to save your analyst ratings if one doesn't exist
    if scraped != []:
        with open('marketbeat_analyst_data.csv', 'a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f, delimiter = ',')
            for row in scraped:
                writer.writerow(row)
    
    # Increment by 1 day after completion
    page_date += dt.timedelta(days=1)
    
print('Finished.')

Finished.


In [16]:
wb.close()