In [143]:
import pandas as pd
import datetime as dt
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from bs4 import BeautifulSoup
import re

def converter(x):
    if type(x) == str:
        if bool(re.match(r'\d+(?:\.)\d+%', x)) == True:
            return float(x[:-1])/100
        elif bool(re.match(r'\d+(?:\.)\d+[A-Z]', x)) == True:
            if x[-1] == 'T':
                return float(x[:-1])*10e11
            elif x[-1] == 'B':
                return float(x[:-1])*10e8
            elif x[-1] == 'M':
                return float(x[:-1])*10e5
            else:
                return str(x)
        elif bool(re.match(r'\d+(?:\.)\d+$', x)) == True:
            return float(x)
        elif bool(re.match(r'^[0-9]+$', x)) == True:
            return float(x)
        else:
            return str(x)
    else:
        return (x)
                

class Ticker:
    def __init__(self, index):
        self.index = index.upper()
    
    def profile(self):
        url = f'https://ca.finance.yahoo.com/quote/{self.index}?p={self.index}&.tsrc=fin-srch'
        dfs = pd.read_html(url, match = '.+')
        frames = [dfs[0],dfs[1]]
        data = pd.concat(frames)
        data.columns = ['KPI', 'value']
        data['value'] = data['value'].apply(lambda x: converter(x))
        data = data.reset_index().drop('index', axis =1).set_index('KPI')
        
        return data
    
    def key_stats(self):
        url = f'https://ca.finance.yahoo.com/quote/{self.index}/key-statistics?p={self.index}'
        dfs = pd.read_html(url, match = '.+')
        data = pd.DataFrame()
        frames = [dfs[x] for x in range(0,len(dfs))]
        data = data.append(frames)
        data.columns = ['KPI', 'value']
        data['value'] = data['value'].apply(lambda x: converter(x))
        data = data.reset_index().drop('index', axis =1).set_index('KPI')
        
        return data

    def get_history(self, date_from=str(dt.date.today() - dt.timedelta(days=7)), date_to=str(dt.date.today()), set_sleep = 1):
        """ This function gets URL from Yahoo Fin, deafult date from: a week ago, date to: today. Dates are format YYYY-MM-DD.
        set_sleep is number seconds used to control the time your browser needs to load each update on page, do not set too quick """
        date_from = dt.datetime.strptime(date_from, '%Y-%m-%d')
        date_to = dt.datetime.strptime(date_to, '%Y-%m-%d')
        unix_to = int(dt.datetime(date_to.year,date_to.month,date_to.day).replace(tzinfo=dt.timezone.utc).timestamp())
        unix_from = int(dt.datetime(date_from.year,date_from.month,date_from.day).replace(tzinfo=dt.timezone.utc).timestamp())
        url = f'https://ca.finance.yahoo.com/quote/{self.index}/history?period1={unix_from}&period2={unix_to}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true'
        options = Options()
        options.add_argument("--headless")
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        browser = webdriver.Chrome(options=options)
        browser.implicitly_wait(2)
        browser.get(url)
        html = browser.find_element_by_tag_name('html')
        old_rows = browser.execute_script("return document.getElementsByTagName('tr').length")
        while True:
            html.send_keys(Keys.END)
            sleep(set_sleep)
            new_rows = browser.execute_script("return document.getElementsByTagName('tr').length")
            if old_rows == new_rows:
                break
            else:
                old_rows = new_rows

        page_source = browser.page_source
        
        soup = BeautifulSoup(page_source, features = 'lxml')
        table = soup.find('table', {"class":"W(100%)"})
        rows = table.findAll('tr')
        headers = [rows[0].findAll('th')[x].text for x in range(len(rows[0].findAll('th')))]

        row_list = []
        for r in range(1,len(rows)):
            get_row = [rows[r].findAll('td')[x].text for x in range(len(rows[r].findAll('td')))]
            row_list.append(get_row)
            data = pd.DataFrame(data = row_list, columns = headers)
        
        for x in range(1,len(data.columns)):
            data.iloc[:,x] = data.iloc[:,x].str.replace(',', '').astype(float)
        data = data.drop([len(data)-1])
        data.iloc[:,0] = pd.to_datetime(data.iloc[:,0])
        browser.close()
        return data

In [144]:
goog = Ticker('goog')
stat = goog.key_stats()
profile = goog.profile()

In [150]:
index = []
value = []
types = []
    
for x in range(len(profile['value'])):
    index.append(profile.index[x]), value.append(profile['value'][x]), types.append(type(profile['value'][x]))

    
prof_df = pd.DataFrame([index,value,types])
prof_df = prof_df.T
prof_df.columns = ['index','value','types']
prof_df

Unnamed: 0,index,value,types
0,Previous Close,1739.37,<class 'float'>
1,Open,1734.43,<class 'float'>
2,Bid,0.00 x 900,<class 'str'>
3,Ask,0.00 x 1100,<class 'str'>
4,Day's Range,"1,712.57 - 1,737.41",<class 'str'>
5,52 Week Range,"1,013.54 - 1,847.20",<class 'str'>
6,Volume,938346,<class 'float'>
7,Avg. Volume,1.70519e+06,<class 'float'>
8,Market Cap,1.164e+12,<class 'float'>
9,Beta (5Y Monthly),1,<class 'float'>


In [149]:
index = []
value = []
types = []

for x in range(len(stat['value'])):
    index.append(stat.index[x]), value.append(stat['value'][x]), types.append(type(stat['value'][x]))

stat_df = pd.DataFrame([index,value,types])
stat_df = stat_df.T
stat_df.columns = ['index','value','types']
stat_df

Unnamed: 0,index,value,types
0,Market Cap (intraday) 5,1.16e+12,<class 'float'>
1,Enterprise Value 3,1.07e+12,<class 'float'>
2,Trailing P/E,33.3,<class 'float'>
3,Forward P/E 1,28.14,<class 'float'>
4,PEG Ratio (5 yr expected) 1,2.02,<class 'float'>
5,Price/Sales (ttm),6.78,<class 'float'>
6,Price/Book (mrq),5.49,<class 'float'>
7,Enterprise Value/Revenue 3,6.24,<class 'float'>
8,Enterprise Value/EBITDA 6,22.29,<class 'float'>
9,Beta (5Y Monthly),1,<class 'float'>


In [152]:
prof_df[prof_df['types'] == str]

Unnamed: 0,index,value,types
2,Bid,0.00 x 900,<class 'str'>
3,Ask,0.00 x 1100,<class 'str'>
4,Day's Range,"1,712.57 - 1,737.41",<class 'str'>
5,52 Week Range,"1,013.54 - 1,847.20",<class 'str'>
13,Forward Dividend & Yield,N/A (N/A),<class 'str'>


In [153]:
stat_df[stat_df['types'] == str]

Unnamed: 0,index,value,types
35,Last Split Factor 2,10000000:10000000,<class 'str'>
36,Last Split Date 3,"Apr. 26, 2015",<class 'str'>
37,Fiscal Year Ends,"Dec. 30, 2019",<class 'str'>
38,Most Recent Quarter (mrq),"Sep. 29, 2020",<class 'str'>


In [154]:
history = goog.get_history()
history

Unnamed: 0,Date,Open,High,Low,Close*,Adj Close**,Volume
0,2020-12-22,1734.43,1737.41,1712.57,1723.5,1723.5,936700.0
1,2020-12-21,1713.51,1740.85,1699.0,1739.37,1739.37,1828400.0
2,2020-12-18,1754.18,1755.11,1720.22,1731.01,1731.01,4016400.0
3,2020-12-17,1768.51,1771.78,1738.66,1747.9,1747.9,1624700.0
4,2020-12-16,1772.88,1773.0,1756.08,1763.0,1763.0,1513500.0


In [155]:
historyOct5 = goog.get_history(date_from = '2020-10-5')
historyOct5

Unnamed: 0,Date,Open,High,Low,Close*,Adj Close**,Volume
0,2020-12-22,1734.43,1737.41,1712.57,1723.5,1723.5,936700.0
1,2020-12-21,1713.51,1740.85,1699.0,1739.37,1739.37,1828400.0
2,2020-12-18,1754.18,1755.11,1720.22,1731.01,1731.01,4016400.0
3,2020-12-17,1768.51,1771.78,1738.66,1747.9,1747.9,1624700.0
4,2020-12-16,1772.88,1773.0,1756.08,1763.0,1763.0,1513500.0
5,2020-12-15,1764.42,1771.42,1749.95,1767.77,1767.77,1482300.0
6,2020-12-14,1775.0,1797.39,1757.21,1760.06,1760.06,1600200.0
7,2020-12-11,1763.06,1784.45,1760.0,1781.77,1781.77,1220700.0
8,2020-12-10,1769.8,1781.31,1740.32,1775.33,1775.33,1362800.0
9,2020-12-09,1812.01,1834.27,1767.81,1784.13,1784.13,1507600.0


In [157]:
history.dtypes

Date           datetime64[ns]
Open                  float64
High                  float64
Low                   float64
Close*                float64
Adj Close**           float64
Volume                float64
dtype: object