# Data Wrangling - MarketWatch Analyst Rating Scraper

Note: this workbook is 1 of 4 scraping and extracting processes that ultimately aggregate into the Technical Indicators workbook in the Data Wrangling phase of capstone 3


* Fundamental Scraper - scrapes 5 years worth of fundamental company financial data from MarketWatch using Beautiful Soup from the S&P 500 list
* Fundamental Calcs  - imports scraped data from the scraper tool, converts text data to numeric - i.e. 5.00M to 5000000 - using regular expressions, and calculates additonal financial metrics
* <span style="color:red"> **Analyst Scraper (this workbook)** </span> - scrapes analyst buy, sell, hold ratings for all S&P 500 stocks and downloads to .csv file
* Mass Yahoo Download and Technical Analysis - downloads 5 years of daily stock pricing data from the S&P 500, Runs complex Directional Index, ADX, Bollinger Band, and other financial charting data. Merges data from fundamental and analyst scrapers

In [34]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from datetime import date
import re

import numpy as np

In [38]:
import os
importfile = 'SandP.csv'
importpath = r'C:\Users\nmur1\Google Drive\Springboard\Capstone2\Stock Import Lists'
exportpath = r'C:\Users\nmur1\Google Drive\Springboard\Capstone2\CleanData'
exportfile = 'Analysts.csv'
os.chdir(importpath)

In [39]:
Company = pd.read_csv(importfile, encoding= 'unicode_escape')
Company.head()

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ABMD,ABIOMED Inc,Health Care


In [47]:
dfAnalysts = []

CompList = Company.Symbol
i = 0

for ticker in CompList:

    #open website
    try:
        urlfinancials = 'https://www.marketwatch.com/investing/stock/'+ ticker + '/analystestimates'
        res = requests.get(urlfinancials)

        #parse reccomendation
        text_soup_recs = BeautifulSoup(requests.get(urlfinancials).text,"lxml") #read in
        recs = text_soup_recs.find('td', {'class': 'recommendation'})
        recs = [i.strip() for i in recs]


        #parse target price
        price = text_soup_recs.find("td", {'class': 'first column2'}).find_next_sibling("td").text
        try:
            price = int(str(price).replace(',',''))
        except:
            price = price
        #get all ratings

        ratings = []
        rec_list = ['Buy','Overweight','Hold','Underweight', 'Sell']

        for name in text_soup_recs.findAll("td", {'class': 'current'}):
            ratings.append(name.text)

        ratings = np.array(ratings[:-1]).astype('int64')
        dictrec = dict(zip(rec_list,ratings))

        dfRatings = pd.DataFrame(dictrec, index = [i])

        dfRatings['Symbol'] = str(ticker).upper()
        dfRatings['Analyst_Price'] = price
        dfRatings['Analyst_Rec'] = recs
        dfAnalysts.append(dfRatings)
        i = i + 1
        print('downloading ', i, ' of ', len(CompList))
    
    except:
        print(ticker, ' not found')
        

downloading  1  of  505
downloading  2  of  505
downloading  3  of  505
downloading  4  of  505
downloading  5  of  505
downloading  6  of  505
downloading  7  of  505
downloading  8  of  505
downloading  9  of  505
downloading  10  of  505
downloading  11  of  505
downloading  12  of  505
downloading  13  of  505
downloading  14  of  505
downloading  15  of  505
downloading  16  of  505
downloading  17  of  505
downloading  18  of  505
downloading  19  of  505
downloading  20  of  505
downloading  21  of  505
downloading  22  of  505
downloading  23  of  505
downloading  24  of  505
downloading  25  of  505
downloading  26  of  505
downloading  27  of  505
downloading  28  of  505
downloading  29  of  505
downloading  30  of  505
downloading  31  of  505
downloading  32  of  505
downloading  33  of  505
downloading  34  of  505
downloading  35  of  505
downloading  36  of  505
downloading  37  of  505
downloading  38  of  505
downloading  39  of  505
downloading  40  of  505
downloadi

downloading  320  of  505
downloading  321  of  505
downloading  322  of  505
downloading  323  of  505
downloading  324  of  505
downloading  325  of  505
downloading  326  of  505
downloading  327  of  505
downloading  328  of  505
downloading  329  of  505
downloading  330  of  505
downloading  331  of  505
downloading  332  of  505
downloading  333  of  505
downloading  334  of  505
downloading  335  of  505
downloading  336  of  505
downloading  337  of  505
downloading  338  of  505
downloading  339  of  505
downloading  340  of  505
downloading  341  of  505
downloading  342  of  505
downloading  343  of  505
downloading  344  of  505
downloading  345  of  505
downloading  346  of  505
downloading  347  of  505
downloading  348  of  505
downloading  349  of  505
downloading  350  of  505
downloading  351  of  505
downloading  352  of  505
downloading  353  of  505
downloading  354  of  505
downloading  355  of  505
downloading  356  of  505
downloading  357  of  505
downloading 

In [48]:
dfAnalysts = pd.concat(dfAnalysts)


In [49]:
dfAnalysts

Unnamed: 0,Buy,Overweight,Hold,Underweight,Sell,Symbol,Analyst_Price,Analyst_Rec
0,3,0,13,0,3,MMM,168.44,Hold
1,5,0,7,0,1,AOS,48.63,Overweight
2,14,2,3,1,1,ABT,106.38,Overweight
3,11,1,4,0,0,ABBV,110.14,Overweight
4,4,0,4,0,1,ABMD,253.40,Overweight
...,...,...,...,...,...,...,...,...
499,6,3,17,0,1,YUM,98.52,Hold
500,3,2,5,0,0,ZBRA,286.71,Overweight
501,18,2,7,0,1,ZBH,146.04,Overweight
502,6,1,17,0,1,ZION,36.48,Hold


In [50]:
import yfinance as yf
def Pricing(ticker, dstart, dend, Type):

    df = yf.download(ticker, 
                          start=dstart, 
                          end = dend,
                          progress=False, group_by = 'Symbol')

    df = df.reset_index()
    df = df.melt(id_vars = 'Date', var_name = ['Symbol', 'Type'])
    df = df[df.Type == Type]
    
    return df

In [51]:
def RollingPrice(sdate, edate, lookback, Stocks):
    YTDPrice = Pricing(Stocks, sdate, edate, 'Close')
    YTDPrice = YTDPrice.sort_values(by = ['Symbol','Date'])
    
    YTDPrice['SMA'] = YTDPrice.value.rolling(window = lookback).mean()
    YTDPrice['STD'] = YTDPrice.value.rolling(window = lookback).std()
    YTDPrice['UpperB'] = (YTDPrice.STD * 2) + YTDPrice.value
    YTDPrice['LowerB'] = (YTDPrice.STD * -2) + YTDPrice.value
    
    
    Max = YTDPrice[YTDPrice.Date == YTDPrice.Date.max()]
    
    df = Max.loc[:,('Symbol', 'value', 'SMA', 'STD', 'UpperB', 'LowerB')]
    
    
    return df



In [52]:
df = RollingPrice('2020-01-01', '2020-07-18', 20, list(Company.Symbol))
    


2 Failed downloads:
- BF.B: No data found for this date range, symbol may be delisted
- BRK.B: No data found, symbol may be delisted


In [53]:
df

Unnamed: 0,Symbol,value,SMA,STD,UpperB,LowerB
92611,A,94.370003,89.308500,2.108188,98.586378,90.153627
332635,AAL,11.910000,12.801500,1.169725,14.249449,9.570551
218377,AAP,144.860001,141.462500,4.809549,154.479099,135.240902
143575,AAPL,385.309998,371.774498,12.468132,410.246261,360.373735
89323,ABBV,100.830002,98.128000,1.522770,103.875542,97.784461
...,...,...,...,...,...,...
364693,YUM,90.570000,87.500500,1.458256,93.486512,87.653488
225775,ZBH,135.139999,121.773000,5.645180,146.430360,123.849639
297289,ZBRA,273.100006,258.923501,6.894559,286.889125,259.310888
163303,ZION,32.560001,33.111000,1.436904,35.433810,29.686193


In [54]:
dfFinal = pd.merge(df, dfAnalysts, on = 'Symbol')
dfFinal = pd.merge(dfFinal, Company, on = 'Symbol')

In [55]:
os.chdir(exportpath)

dfFinal.Analyst_Price = dfFinal.Analyst_Price.astype('float')
dfFinal['Total_Ratings'] = dfFinal.Buy + dfFinal.Overweight + dfFinal.Hold + dfFinal.Underweight + dfFinal.Sell
dfFinal['Percent_Buy'] = dfFinal.Buy / dfFinal.Total_Ratings
dfFinal.to_csv(exportfile)