In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import selenium.common.exceptions as exceptions
import time

import pickle
from tqdm import tqdm
from datetime import datetime

import sys
sys.path.append('../scripts/')
from account_auth import get_login
user, pw = get_login('../authentication/')

# Table of Contents

* [Sample Code for Scraping 1 Ticker](#one-ticker)
* [Sample stock data after scraping](#post-scrape)

<a class="anchor" id="one-ticker"></a>

# Sample Code for Scraping 1 Ticker

In [4]:
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('detach', True)
options.add_argument('--log-level=3')
# options.add_experiemntal_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path='../chromedriver.exe')

driver.get('https://www.fidelity.com/')
WebDriverWait(driver, 30).until(EC.element_to_be_clickable(
        (By.CSS_SELECTOR, 'input#userId-input'))).send_keys(user)
driver.find_element_by_css_selector('input#password').send_keys(pw)
driver.find_element_by_css_selector('button#fs-login-button').click()

time.sleep(7)

In [5]:
ticker = 'ABT'

driver.get(f'https://snapshot.fidelity.com/fidresearch/snapshot/landing.jhtml#/analystsopinions?symbol={ticker}')
dropdown = Select(WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.ID, 'firm'))))

dropdown.select_by_visible_text('Current Firm Opinion')
table_id = '@id="allOpinionsTable"'
tablebot_id = '@id="bottom-table"'

col_eles = driver.find_elements_by_xpath(f'//table[{table_id}]/thead/tr/th')
num_cols = len(col_eles)
row_eles = driver.find_elements_by_xpath(f'//table[{table_id}]/tbody/tr')
num_rows = len(row_eles)
bottom_rows = driver.find_elements_by_xpath(f'//table[{tablebot_id}]/tbody/tr')
num_bot_rows = len(bottom_rows)

table = []
#add each row to the table
for i in range(num_rows):
    row = []
    for j in range(num_cols):
        # getting text from the ith row and jth column
        row.append(driver.find_element_by_xpath(f'//table[{table_id}]/tbody/tr[{i + 1}]/td[{j + 1}]').text)
    table.append(row)
for i in range(num_bot_rows):
    row = []
    for j in range(num_cols):
        # getting text from the ith row and jth column
        row.append(driver.find_element_by_xpath(f'//table[{tablebot_id}]/tbody/tr[{i + 1}]/td[{j + 1}]').text)
    table.append(row)

#change select option, add the new data to the current table
dropdown.select_by_visible_text('Last Opinion Change')
for i in range(num_rows):
    #iterate over the new columns
    for j in (5, 6):
        table[i][j] = driver.find_element_by_xpath(f'//table[{table_id}]/tbody/tr[{i + 1}]/td[{j + 1}]').text
offset = i + 1
for k in range(num_bot_rows):
    #iterate over the new columns
    for j in (5, 6):
        table[offset + k][j] = driver.find_element_by_xpath(f'//table[{tablebot_id}]/tbody/tr[{k + 1}]/td[{j + 1}]').text 

#clean up strings in our table
for i in range(len(table)):
    table[i][0] = table[i][0].replace(' (i)', '')
    table[i].pop(2)

#### Saving column names for the analyst overview landing page

In [6]:
col_names = []
for i, ele in enumerate(col_eles):
    col_names += [ele.text.replace('\n', ' ')]
    
col_names[0] = 'Firm / Aggregate Opinion'
col_names[1] = 'Standardized Opinion'
col_names[4] = 'Current Firm Opinion Date'
col_names[5] = 'Last Opinion Change Date'
col_names[8] = 'Latest Report Date'
col_names.remove('1 Year History Click line for details')

print(col_names)

with open('../pickles/ticker_analyst_overview_column_names.pickle', 'wb') as f:
    pickle.dump(col_names, f)

['Firm / Aggregate Opinion', 'Standardized Opinion', 'Investars Smartindex', 'Current Firm Opinion Date', 'Last Opinion Change Date', 'Action', 'Opinion', 'Latest Report Date']


### Saving subtables from each individual analyst landing page

In [5]:
urls = []
for i in range(num_rows):
    # getting text from the ith row and jth column
    d = driver.find_element_by_xpath(f'//table[{table_id}]/tbody/tr[{i + 1}]')
    urls.append(d.find_element_by_tag_name('a').get_attribute('href') + '&view=3')
for i in range(num_bot_rows):
    d = driver.find_element_by_xpath(f'//table[{tablebot_id}]/tbody/tr[{i + 1}]')
    urls.append(d.find_element_by_tag_name('a').get_attribute('href') + '&view=3')

In [9]:
table_jsid = '@jsid="smartsentiment-performance-data-table-performance"'
full_table = table

for i, url in enumerate(urls):
    sub_table = []
    
    #if opinion has changed in last 3 years, check their website for all opinion changes
    if (datetime.now() - datetime.strptime(full_table[i][4], '%m/%d/%y')).days < 3*366:
        driver.get(url)
        #sleep for at least 7 seconds, then wait to load fully
        time.sleep(7)
        WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, f'//table[{table_jsid}]')))

        col_eles = driver.find_elements_by_xpath(f'//table[{table_jsid}]/thead/tr/th')
        num_cols = len(col_eles)
        row_eles = driver.find_elements_by_xpath(f'//table[{table_jsid}]/tbody/tr')
        num_rows = len(row_eles)

        #add each row to the table
        for j in range(num_rows):
            row = []
            for k in range(num_cols):
                # getting text from the ith row and jth column
                row.append(driver.find_element_by_xpath(f'//table[{table_jsid}]/tbody/tr[{j + 1}]/td[{k + 1}]').text)
            sub_table.append(row)

    sub_table.append(url)
    full_table[i] += [sub_table]

#### Saving column names from the individual landing page of the analysts

In [87]:
col_eles = driver.find_elements_by_xpath(f'//table[{table_jsid}]/thead/tr/th')

col_names = []
for i, ele in enumerate(col_eles):
    col_names += [ele.text.replace('\n', ' ')]

print(col_names)

with open('../pickles/ticker_specific_analyst_column_names.pickle', 'wb') as f:
    pickle.dump(col_names, f)

['Period', 'Action', 'From', 'To', 'Period Length', 'Price Performance', 'S&P 500 Performance', 'Russell 3000 Performance']


<a class="anchor" id="post-scrape"></a>

In [None]:
with open('../pickles/ticker_data/success/.pickle', 'wb') as f:
    pickle.dump(col_names, f)

In [None]:
'''
with open(f'../pickles/ticker_data/success/{ticker}.pickle', 'wb') as f:
    pickle.dump(f)
'''
#uncomment and save if desired for below

# Sample stock data after scraping

In [5]:
with open(f'../pickles/ticker_data/success/{ticker}.pickle', 'rb') as f:
    full_table = pickle.load(f)

In [12]:
full_table[-1]

['Equity Summary Score\n8 Firms† | Methodology',
 'N/A',
 'N/A',
 '07/13/21',
 '06/23/21',
 'Upgrade(r)',
 'Bullish',
 'N/A',
 [['06/23/2021 - 07/12/2021',
   'Upgrade',
   'Neutral (5.5)',
   'Bullish (7.1)',
   '19 days',
   '+7.85%',
   '+3.25%',
   '+2.75%'],
  ['06/16/2021 - 06/22/2021',
   'Downgrade',
   'Bullish (8)',
   'Neutral (5.5)',
   '7 days',
   '+3.35%',
   '0.00%',
   '+0.06%'],
  ['05/06/2021 - 06/15/2021',
   'Upgrade',
   'Neutral (4.4)',
   'Bullish (8)',
   '41 days',
   '+1.20%',
   '+1.90%',
   '+2.07%'],
  ['04/28/2021 - 05/05/2021',
   'Downgrade',
   'Bullish (8.2)',
   'Neutral (4.4)',
   '8 days',
   '-4.68%',
   '-0.46%',
   '-1.01%'],
  ['04/21/2021 - 04/27/2021',
   'Upgrade',
   'Neutral (4.7)',
   'Bullish (8.2)',
   '7 days',
   '+0.96%',
   '+1.25%',
   '+1.76%'],
  ['04/14/2021 - 04/20/2021',
   'Downgrade',
   'Bullish (7.9)',
   'Neutral (4.7)',
   '7 days',
   '-0.98%',
   '-0.16%',
   '-0.40%'],
  ['04/01/2021 - 04/13/2021',
   'Downgrade',
   