In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def get_url(url):
    r = requests.get(url)
    soup = bs(r.content,'html.parser')
    return soup

In [3]:
url = "https://www.dogsofthedow.com/largest-companies-by-market-cap.htm"

soup = get_url(url)

In [4]:
def get_quote():
    stock_quotes = []
    for i in soup.select('tbody a')[:-1]:
        stock_quotes.append(i.get_text().strip().replace('.','-'))
    return stock_quotes

In [5]:
get_quote()[:10]

['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'FB', 'TSLA', 'BABA', 'BRK-A', 'TSM', 'V']

In [6]:
def get_companyname():
    company_name = []
    for i in soup.select('tbody .column-2')[:-2]:
        company_name.append(i.get_text().strip())
    return company_name

In [7]:
get_companyname()[:10]

['Apple',
 'Microsoft',
 'Amazon',
 'Alphabet',
 'Facebook',
 'Tesla',
 'Alibaba',
 'Berkshire Hathaway',
 'Taiwan Semiconductor',
 'Visa']

In [8]:
ticker_company = {get_quote()[i]:get_companyname()[i] for i in range(len(get_quote()))}

In [12]:
ticker_company

{'AAPL': 'Apple',
 'MSFT': 'Microsoft',
 'AMZN': 'Amazon',
 'GOOGL': 'Alphabet',
 'FB': 'Facebook',
 'TSLA': 'Tesla',
 'BABA': 'Alibaba',
 'BRK-A': 'Berkshire Hathaway',
 'TSM': 'Taiwan Semiconductor',
 'V': 'Visa',
 'JPM': 'JPMorgan Chase',
 'JNJ': 'Johnson & Johnson',
 'MA': 'Mastercard',
 'WMT': 'Walmart',
 'DIS': 'Disney',
 'UNH': 'UnitedHealth',
 'BAC': 'Bank of America',
 'PG': 'Procter & Gamble',
 'NVDA': 'NVIDIA',
 'HD': 'Home Depot',
 'PYPL': 'PayPal',
 'EDU': 'New Oriental Education & Technology',
 'CMCSA': 'Comcast',
 'XOM': 'ExxonMobil',
 'INTC': 'Intel',
 'VZ': 'Verizon',
 'NFLX': 'Netflix',
 'KO': 'Coca-Cola',
 'ASML': 'ASML',
 'NKE': 'Nike',
 'CVX': 'Chevron',
 'T': 'AT&T',
 'TM': 'Toyota',
 'ORCL': 'Oracle',
 'ADBE': 'Adobe',
 'ABT': 'Abbott Laboratories',
 'CSCO': 'Cisco Systems',
 'LLY': 'Eli Lilly',
 'PFE': 'Pfizer',
 'CRM': 'Salesforce',
 'ABBV': 'AbbVie',
 'NVS': 'Novartis AG',
 'MRK': 'Merck',
 'PEP': 'Pepsi',
 'PDD': 'Pinduoduo',
 'AVGO': 'Broadcom',
 'TMO': 'The

In [13]:
stock_dict = {}
def get_stocks(): 
              
    for i in get_quote():
        stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/{}?'

        params = { 'range' : '5y',
               'interval' : '1d',
              'events' : 'history',
                'includeAdjustedClose' : 'true'}
        stock_dict[i] = requests.get(stock_url.format(i),params=params).text.split('\n')
    return stock_dict

In [14]:
get_stocks()['AAPL'][:5]

['Date,Open,High,Low,Close,Adj Close,Volume',
 '2016-03-14,25.477501,25.727501,25.445000,25.629999,23.838688,100304400',
 '2016-03-15,25.990000,26.295000,25.962500,26.145000,24.317701,160270800',
 '2016-03-16,26.152500,26.577499,26.147499,26.492500,24.640911,153214000',
 '2016-03-17,26.379999,26.617500,26.240000,26.450001,24.601379,137682800']

In [15]:
for i in stock_dict.keys():
    stock_dict[i].pop(0)

In [16]:
first_clean = [[[i+','+j] for i in stock_dict[j]] for j in stock_dict.keys()]

In [17]:
data_flatten = [k.split(',') for i in first_clean for j in i for k in j]

In [18]:
data_flatten[:5]

[['2016-03-14',
  '25.477501',
  '25.727501',
  '25.445000',
  '25.629999',
  '23.838688',
  '100304400',
  'AAPL'],
 ['2016-03-15',
  '25.990000',
  '26.295000',
  '25.962500',
  '26.145000',
  '24.317701',
  '160270800',
  'AAPL'],
 ['2016-03-16',
  '26.152500',
  '26.577499',
  '26.147499',
  '26.492500',
  '24.640911',
  '153214000',
  'AAPL'],
 ['2016-03-17',
  '26.379999',
  '26.617500',
  '26.240000',
  '26.450001',
  '24.601379',
  '137682800',
  'AAPL'],
 ['2016-03-18',
  '26.584999',
  '26.625000',
  '26.297501',
  '26.480000',
  '24.629284',
  '176820800',
  'AAPL']]

In [19]:
len(data_flatten)

62303

In [20]:
colum = ['Date,Open,High,Low,Close,Adj Close,Volume'][0].split(',') + ['Company']

In [27]:
df = pd.DataFrame(data=data_flatten,columns=colum)

In [28]:
df['Date'] = df['Date'].apply(lambda x : x.replace('-',''))
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')
df.set_index('Date',inplace=True)

In [29]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Company
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-07,25.5975,25.7075,25.24,25.467501,23.687551,143315600,AAPL
2016-03-08,25.195,25.440001,25.1,25.2575,23.492224,126247600,AAPL
2016-03-09,25.327499,25.395,25.067499,25.280001,23.513155,108806800,AAPL
2016-03-10,25.352501,25.559999,25.0375,25.2925,23.524775,134054400,AAPL
2016-03-11,25.559999,25.57,25.375,25.565001,23.778234,109632800,AAPL


In [30]:
df.to_csv('stockPriceXL-NEW.csv')

In [31]:
for i in df.columns[:-1]:
    df[i] = df[i].astype(float)

In [32]:
!pip install fbprophet

Collecting fbprophet
  Using cached fbprophet-0.7.1.tar.gz (64 kB)
Using legacy 'setup.py install' for fbprophet, since package 'wheel' is not installed.
Installing collected packages: fbprophet
    Running setup.py install for fbprophet: started
    Running setup.py install for fbprophet: finished with status 'error'


    ERROR: Command errored out with exit status 1:
     command: 'c:\python\python386\python.exe' -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\Jahaan Nadkarni\\AppData\\Local\\Temp\\pip-install-ylm3p26i\\fbprophet_90d3e11c5bd942dfa39e4b2c7e14bef5\\setup.py'"'"'; __file__='"'"'C:\\Users\\Jahaan Nadkarni\\AppData\\Local\\Temp\\pip-install-ylm3p26i\\fbprophet_90d3e11c5bd942dfa39e4b2c7e14bef5\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' install --record 'C:\Users\Jahaan Nadkarni\AppData\Local\Temp\pip-record-ferz60ji\install-record.txt' --single-version-externally-managed --compile --install-headers 'c:\python\python386\Include\fbprophet'
         cwd: C:\Users\Jahaan Nadkarni\AppData\Local\Temp\pip-install-ylm3p26i\fbprophet_90d3e11c5bd942dfa39e4b2c7e14bef5\
    Complete output (45 lines):
    running install
    running build
    run