In [1]:
import yfinance as yf
import re
import json
import csv
import io
import pandas as pd
from io import StringIO
from bs4 import BeautifulSoup
import requests
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import xlsxwriter
from google_trans_new import google_translator

In [2]:
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
url_profile ='https://finance.yahoo.com/quote/{}/profile?p={}'
url_financials ='https://finance.yahoo.com/quote/{}/financials?p={}'

In [3]:
stock = 'taee11.SA'

# Financial (IS, CF and BS)

In [4]:
response = requests.get(url_financials.format(stock, stock))
soup = BeautifulSoup(response.text,'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
json_data = json.loads(script_data[start:-12])
json_data['context'].keys()

dict_keys(['dispatcher', 'options', 'plugins'])

In [5]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore'].keys()

dict_keys(['financialsTemplate', 'cashflowStatementHistory', 'balanceSheetHistoryQuarterly', 'earnings', 'price', 'incomeStatementHistoryQuarterly', 'incomeStatementHistory', 'balanceSheetHistory', 'cashflowStatementHistoryQuarterly', 'quoteType', 'summaryDetail', 'symbol', 'pageViews'])

In [6]:
annual_is = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['incomeStatementHistory']['incomeStatementHistory']
quarterly_is = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['incomeStatementHistoryQuarterly']['incomeStatementHistory']

annual_cf = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['cashflowStatementHistory']['cashflowStatements']
quarterly_cf = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['cashflowStatementHistoryQuarterly']['cashflowStatements']

annual_bs = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['balanceSheetHistory']['balanceSheetStatements']
quarterly_bs = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['balanceSheetHistoryQuarterly']['balanceSheetStatements']

In [7]:
annual_is[0]['operatingIncome']

{'raw': 757682000, 'fmt': '757.68M', 'longFmt': '757,682,000'}

In [8]:
annual_is_stmts = []
quarterly_is_stmts = []

# consolidate annual
for s in annual_is:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    annual_is_stmts.append(statement)
    
# consolidate Quarterly
for s in quarterly_is:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    quarterly_is_stmts.append(statement)
    
    
annual_cf_stmts = []
quarterly_cf_stmts = []

# consolidate annual Cash Flow
for s in annual_cf:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    annual_cf_stmts.append(statement)
    
# consolidate Quarterly Cash Flow
for s in quarterly_cf:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    quarterly_cf_stmts.append(statement)
    
    
annual_bs_stmts = []
quarterly_bs_stmts = []

# consolidate annual Balance Sheet
for s in annual_bs:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    annual_bs_stmts.append(statement)
    
# consolidate Quarterly Balance Sheet
for s in quarterly_bs:
    statement = {}
    for key, val in s.items():
        try:
            statement[key] = val['raw']
        except TypeError:
            continue
        except KeyError:
            continue
    quarterly_bs_stmts.append(statement)

In [9]:
annual_is_stmts[0]

{'incomeBeforeTax': 703664000,
 'netIncome': 648285000,
 'sellingGeneralAdministrative': 118382000,
 'grossProfit': 879990000,
 'ebit': 757682000,
 'endDate': 1514678400,
 'operatingIncome': 757682000,
 'interestExpense': -233003000,
 'incomeTaxExpense': 55379000,
 'totalRevenue': 1077059000,
 'totalOperatingExpenses': 319377000,
 'costOfRevenue': 197069000,
 'totalOtherIncomeExpenseNet': -54018000,
 'netIncomeFromContinuingOps': 648285000,
 'netIncomeApplicableToCommonShares': 648285000}

# Profile Data

In [10]:
response = requests.get(url_profile.format(stock, stock))
soup = BeautifulSoup(response.text,'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
json_data = json.loads(script_data[start:-12])

In [11]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore'].keys()

dict_keys(['financialsTemplate', 'price', 'secFilings', 'quoteType', 'calendarEvents', 'summaryDetail', 'symbol', 'assetProfile', 'pageViews'])

In [12]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile'].keys()

dict_keys(['zip', 'sector', 'fullTimeEmployees', 'compensationRisk', 'auditRisk', 'longBusinessSummary', 'city', 'phone', 'state', 'shareHolderRightsRisk', 'governanceEpochDate', 'boardRisk', 'country', 'companyOfficers', 'website', 'maxAge', 'overallRisk', 'address1', 'industry', 'address2'])

In [13]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['companyOfficers']

[{'maxAge': 1,
  'name': 'Andre Augusto Telles Moreira',
  'title': 'CEO & Member of Exec. Board',
  'exercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'},
  'unexercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'}},
 {'maxAge': 1,
  'name': 'Mr. Erik  da Costa Breyer',
  'age': 49,
  'title': 'CFO, Investor Relations Officer & Member of Exec. Board',
  'yearBorn': 1971,
  'exercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'},
  'unexercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'}},
 {'maxAge': 1,
  'name': 'Mr. Marco Antonio Resende Faria',
  'title': 'Technical Director & Member of Exec. Board',
  'exercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'},
  'unexercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'}},
 {'maxAge': 1,
  'name': 'Mr. Fabio Antunes Fernandes',
  'title': 'Bus. & Ownership Interest Management Officer and Member of Exec. Board',
  'exercisedValue': {'raw': 0, 'fmt': None, 'longFmt': '0'},
  'unexercisedValue': {'raw': 0, 'fmt': None, 'lon

In [14]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['longBusinessSummary']

'Transmissora AlianÃ§a de Energia ElÃ©trica S.A. implements, operates, and maintains electric power transmission assets in Brazil. It operates 13,576 kilometers (km) of transmission lines, which include 11,062 km of transmission lines in operation and 2,514 km of lines under construction; and 97 substations with voltage ranging from 230 to 525kV. The company was founded in 2000 is headquartered in Rio de Janeiro, Brazil.'

In [15]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['secFilings']['filings']

KeyError: 'filings'

In [16]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['summaryDetail']

{'previousClose': {'raw': 39.47, 'fmt': '39.47'},
 'regularMarketOpen': {'raw': 39.28, 'fmt': '39.28'},
 'twoHundredDayAverage': {'raw': 35.94848, 'fmt': '35.95'},
 'trailingAnnualDividendYield': {'raw': 0.13460857, 'fmt': '13.46%'},
 'payoutRatio': {},
 'volume24Hr': {},
 'regularMarketDayHigh': {'raw': 39.54, 'fmt': '39.54'},
 'navPrice': {},
 'averageDailyVolume10Day': {'raw': 1467200,
  'fmt': '1.47M',
  'longFmt': '1,467,200'},
 'totalAssets': {},
 'regularMarketPreviousClose': {'raw': 39.47, 'fmt': '39.47'},
 'fiftyDayAverage': {'raw': 39.749714, 'fmt': '39.75'},
 'trailingAnnualDividendRate': {'raw': 5.313, 'fmt': '5.31'},
 'open': {'raw': 39.28, 'fmt': '39.28'},
 'toCurrency': None,
 'averageVolume10days': {'raw': 1467200,
  'fmt': '1.47M',
  'longFmt': '1,467,200'},
 'expireDate': {},
 'yield': {},
 'algorithm': None,
 'dividendRate': {'raw': 4.5, 'fmt': '4.50'},
 'exDividendDate': {'raw': 1621382400, 'fmt': '2021-05-19'},
 'beta': {'raw': 0.459434, 'fmt': '0.46'},
 'circulati

# Statistics

In [17]:
response = requests.get(url_stats.format(stock, stock))
soup = BeautifulSoup(response.text,'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
json_data = json.loads(script_data[start:-12])

In [18]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']

{'annualHoldingsTurnover': {},
 'enterpriseToRevenue': {'raw': 5.039, 'fmt': '5.04'},
 'beta3Year': {},
 'profitMargins': {'raw': 0.64198, 'fmt': '64.20%'},
 'enterpriseToEbitda': {'raw': 7.31, 'fmt': '7.31'},
 '52WeekChange': {'raw': 0.3829713, 'fmt': '38.30%'},
 'morningStarRiskRating': {},
 'forwardEps': {'raw': 2.19, 'fmt': '2.19'},
 'revenueQuarterlyGrowth': {},
 'sharesOutstanding': {'raw': 0, 'fmt': None, 'longFmt': '0'},
 'fundInceptionDate': {},
 'annualReportExpenseRatio': {},
 'totalAssets': {},
 'bookValue': {'raw': 19.153, 'fmt': '19.15'},
 'sharesShort': {},
 'sharesPercentSharesOut': {},
 'fundFamily': None,
 'lastFiscalYearEnd': {'raw': 1609372800, 'fmt': '2020-12-31'},
 'heldPercentInstitutions': {'raw': 0.25951, 'fmt': '25.95%'},
 'netIncomeToCommon': {'raw': 2427537920,
  'fmt': '2.43B',
  'longFmt': '2,427,537,920'},
 'trailingEps': {'raw': 7.047, 'fmt': '7.05'},
 'lastDividendValue': {'raw': 1.35436, 'fmt': '1.35'},
 'SandP52WeekChange': {'raw': 0.34921706, 'fmt': 

In [19]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore'].keys()

dict_keys(['defaultKeyStatistics', 'financialsTemplate', 'price', 'financialData', 'quoteType', 'calendarEvents', 'summaryDetail', 'symbol', 'pageViews'])

In [20]:
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['financialData']

{'ebitdaMargins': {'raw': 0.6893, 'fmt': '68.93%'},
 'profitMargins': {'raw': 0.64198, 'fmt': '64.20%'},
 'grossMargins': {'raw': 0.73085, 'fmt': '73.08%'},
 'operatingCashflow': {'raw': 597715008,
  'fmt': '597.72M',
  'longFmt': '597,715,008'},
 'revenueGrowth': {'raw': 0.32, 'fmt': '32.00%'},
 'operatingMargins': {'raw': 0.68683, 'fmt': '68.68%'},
 'ebitda': {'raw': 2606467072, 'fmt': '2.61B', 'longFmt': '2,606,467,072'},
 'targetLowPrice': {'raw': 22.24, 'fmt': '22.24'},
 'recommendationKey': 'none',
 'grossProfits': {'raw': 879990000,
  'fmt': '879.99M',
  'longFmt': '879,990,000'},
 'freeCashflow': {'raw': 1463209088,
  'fmt': '1.46B',
  'longFmt': '1,463,209,088'},
 'targetMedianPrice': {'raw': 24, 'fmt': '24.00'},
 'currentPrice': {'raw': 39.09, 'fmt': '39.09'},
 'earningsGrowth': {'raw': 0.421, 'fmt': '42.10%'},
 'currentRatio': {'raw': 2.925, 'fmt': '2.92'},
 'returnOnAssets': {'raw': 0.118599996, 'fmt': '11.86%'},
 'numberOfAnalystOpinions': {'raw': 11, 'fmt': '11', 'longFmt

In [21]:
df = pd.DataFrame(json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['calendarEvents'], columns= ['exDividendDate', 'earningsDate'])

df.to_csv (r'C:\Users\PALETTA\Documents\Análise\Web_Scraper_Yahoo_Finance\stock.csv', index = False, header=True)

print (df)

    exDividendDate earningsDate
fmt     2021-05-19          NaN
raw     1621382400          NaN


# Historical Stock Data

In [22]:
stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/KO?period1=1590931955&period2=1622467955&interval=1d&events=history&includeAdjustedClose=true'

In [23]:
response = requests.get(stock_url)

In [None]:
stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/KO?'

params = {
    'period1':'1590931955',
    'period2':'1622467955',
    'interval':'1d',
    'events':'history',
    'includeAdjustedClose':'true'
}

In [None]:
params = {
    'range': '5y',
    'interval':'1d',
    'events':'history',
    'includeAdjustedClose':'true'
}

In [None]:
response = requests.get(stock_url.format(stock),params=params)

In [None]:
file = StringIO(response.text)
reader = csv.reader(file)
data = list(reader)
for row in data[:5]:
    print(row)

In [None]:
def TT_preco(codigo, data, hora):
    inicio = datetime.strftime(datetime.strptime(data+' '+hora, '%Y-%m-%d %H:%M')+timedelta(hours=3), '%Y-%m-%d %H:%M')
    fim = datetime.strftime(datetime.strptime(data+' '+hora, '%Y-%m-%d %H:%M')+timedelta(hours=4), '%Y-%m-%d %H:%M')

    data_inicial = datetime.strftime(datetime.strptime(data, '%Y-%m-%d') + timedelta(-1), '%Y-%m-%d')
    data_final = datetime.strftime(datetime.strptime(data, '%Y-%m-%d') + timedelta(1), '%Y-%m-%d')

    df = yf.download(codigo,
                     start=data_inicial,
                     end=data_final,
                     progress=False,
                     interval='1m')
    df['DATA'] = df.index.values
    df2 = df[(df['DATA'] >= datetime.strptime(inicio, '%Y-%m-%d %H:%M')) & (df['DATA'] <= datetime.strptime(fim, '%Y-%m-%d %H:%M'))]
    
    soma = 0
    mult = 0

    df2.to_csv('dadosTT.csv')

    for i in range(0, len(df2['DATA'])):
        mult += (df2['Open'][i] * df2['Volume'][i])
        soma += df2['Volume'][i]

    preco = mult / soma

    print(codigo + ' - ' + str(preco.round(3)))
    return(df2)

In [None]:
for codigo in ['PETR4.SA']:
    data = '2021-05-28'
    hora_inicial = '10:03'
    tabela = TT_preco(codigo, data, hora_inicial)
    tabela

    plt.plot(tabela['DATA'],tabela['Open'])
    plt.title(codigo)
    plt.xlabel('HORÁRIO')
    plt.ylabel('PREÇO')
    plt.grid(True)
    plt.show()

In [None]:
df2.to_csv (r'C:\Users\PALETTA\Documents\Análise\Web_Scraper_Yahoo_Finance\export_dataframe.csv', index = False, header=True)

print (df2)

In [24]:
translator = google_translator()  
translate_text = translator.translate(json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['longBusinessSummary'],lang_tgt='pt')  
print(translate_text)

KeyError: 'assetProfile'