# the objective of this file is scrape yahoo finance web page: get financial info about companies and add that info to a google sheet 

## this section creates a list of tickers from a google sheet

In [1]:
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient import discovery
import re

In [2]:
## this function reads a sheet and gets tickers
def get_tickers(spreadsheet_id,range_):
    ## here we make sure we get authorize
    scope = 'https://www.googleapis.com/auth/spreadsheets.readonly'

    creds = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope)

    service = discovery.build('sheets', 'v4', credentials=creds)
    
    ## here we read the sheet an extract the tickers and remember the row of each ticker
    sheet = service.spreadsheets()
    data = sheet.values().get(spreadsheetId=spreadsheet_id, range=range_).execute()
    values = data['values']
    tickers = []
    row = 0
    for item in values:
        if item == [] or row < 5:
            row += 1
            continue
        elif len(tickers) < 6:
            ticker = re.findall('\((.*)\)', item[0])
            ticker = ticker[0].lower().strip()
            tickers.append(ticker)
        else:
            break
    
    return tickers 

## this section receives the list of tickers and get the financial info from yahoo for each ticker in the list

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import requests
from bs4 import BeautifulSoup
import time

In [None]:
## getting the tickers
tickers = get_tickers('1R0h3A6cmfqbCV7788cLK3DdyRirREs8KbHxppulbf98', 'hoja 1')

## making a class Scraper

In [None]:
class Scraper():
    def __init__(self, ticker):
        self._ticker = ticker
        url = self._search_ticker(self._ticker)
        if url == None:
            print('the page took too long to load for ticker', self._ticker)
            self._html = None
        else:
            ticker_page = requests.get(url)
            self._html = BeautifulSoup(ticker_page.text, 'html.parser')


    def _search_ticker(self, ticker):
        ## tiempo maximo para la demora inteligente
        delay = 15
        
        driver = webdriver.Chrome(executable_path='./chromedriver')
        driver.get('https://finance.yahoo.com/')

        try:
            ## making sure the page loaded
            page_loaded = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//form[@id="header-search-form"]/input[@id="yfin-usr-qry"]')))
        
            ## searching ticker
            search_box = driver.find_element_by_xpath('//form[@id="header-search-form"]/input       [@id="yfin-usr-qry"]')
            search_box.send_keys(self._ticker)
            search_button = driver.find_element_by_xpath('//form[@id="header-search-form"]//button[@id="header-desktop-search-button"]')
            search_button.click()

            ## making sure the page loaded
            page_loaded = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//div[@id="quote-header-info"]')))

            ## getting url, making the request and the soup
            url = driver.current_url

            return url
       
        except TimeoutException:
            print('the page took too long to load')

            return None


## using only functions

In [3]:
## this function gets the financial info from yahoo finance
def get_financials(ticker):
    driver = webdriver.Chrome(executable_path='./chromedriver')
    url = 'https://finance.yahoo.com/'
    driver.get(url)

    ## tiempo maximo para la demora inteligente
    delay = 15

    try:
        ## waiting for the page
        page_loaded = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//form[@id="header-search-form"]/input[@id="yfin-usr-qry"]')))
        
        ## searching ticker
        search_box = driver.find_element_by_xpath('//form[@id="header-search-form"]/input       [@id="yfin-usr-qry"]')
        search_box.send_keys(ticker)
        search_button = driver.find_element_by_xpath('//form[@id="header-search-form"]//button[@id="header-desktop-search-button"]')
        search_button.click()

        ## waiting for the page
        page_loaded = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//div[@id="quote-header-info"]')))

        ## getting url, making the request and the soup
        url = driver.current_url
        ticker_page = requests.get(url)
        s = BeautifulSoup(ticker_page.text, 'html.parser') 

        ## creating financial_info dict and getting company, ticker, exchange, price, price_currency 
        financial_info = {}
        

    except TimeoutException:
        print('the page took too long to load\n or the ticker was not found')

    ## close the browser
    driver.close()

## this section sends the financial info to a google sheet