# Daily Scrapping

This notebook a routine to scrap the data from Yahoo daily

In [26]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [155]:
from datetime import datetime
import numpy as np
import pandas as pd

from dashboards.yahoo_parser import YahooParser


## Get the Portfolio

To get the tickers in the portfolio, we are going to export the "Posição na Carteira" table from `statusinvest`.<br>
https://statusinvest.com.br/carteira/patrimonio

In [153]:
columns={'ATIVO': 'Ticker', 'PREÇO MÉDIO': 'AvgPrice', 'PREÇO ATUAL': 'Price', 'QUANTIDADE': 'Qty'}
portfolio = pd.read_csv('CarteiraStatusInvest.csv', encoding='utf-8', sep=';', decimal=',')[columns.keys()]
portfolio = portfolio.rename(columns=columns)
portfolio['Qty'] = portfolio['Qty'].astype('int')
portfolio['Volume'] = portfolio['Qty'] * portfolio['Price']
portfolio['AvgPrice'] = portfolio['AvgPrice'].round(2)
portfolio

Unnamed: 0,Ticker,AvgPrice,Price,Qty,Volume
0,ABCB4,14.61,23.21,3128,72600.88
1,AGRO3,26.17,26.72,500,13360.0
2,AURE3,11.79,11.11,3200,35552.0
3,BBAS3,21.28,28.35,4950,140332.5
4,BBSE3,28.81,36.42,3700,134754.0
5,BLAU3,26.45,13.95,500,6975.0
6,BRBI11,16.15,15.8,700,11060.0
7,BRKM5,29.85,18.19,900,16371.0
8,CSAN3,13.63,13.06,1100,14366.0
9,EGIE3,33.9,44.13,1300,57369.0


In [73]:
yahoo = YahooParser(headless=True)

alternate_tickers = {'ITSA3': {'ticker': 'ITSA4'}, 'SANB3': {'ticker': 'SANB11', 'factor': 0.5}}

results = yahoo.get_tickers(portfolio['Ticker'], alternate_tickers=alternate_tickers)

In [97]:
results = results.drop(columns='quote')


In [154]:
portfolio = pd.concat([portfolio.set_index('Ticker'), results], axis=1)
portfolio['analists'] = portfolio['analists'].replace('', 0).astype('int')

portfolio

Unnamed: 0,AvgPrice,Price,Qty,Volume,recomendation,low,mean,high,analists
ABCB4,14.61,23.21,3128,72600.88,2.5,21.83,25.02,28.0,8
AGRO3,26.17,26.72,500,13360.0,2.5,29.7,36.35,43.0,2
AURE3,11.79,11.11,3200,35552.0,2.6,12.0,14.72,17.0,13
BBAS3,21.28,28.35,4950,140332.5,1.8,17.5,34.0,45.0,14
BBSE3,28.81,36.42,3700,134754.0,2.7,30.0,37.15,42.0,12
BLAU3,26.45,13.95,500,6975.0,3.3,12.0,16.55,26.0,6
BRBI11,16.15,15.8,700,11060.0,1.8,18.0,19.14,20.51,6
BRKM5,29.85,18.19,900,16371.0,2.7,20.0,26.39,35.0,9
CSAN3,13.63,13.06,1100,14366.0,2.2,16.5,23.69,31.8,9
EGIE3,33.9,44.13,1300,57369.0,3.3,38.0,44.15,50.0,13


In [163]:
portfolio = portfolio.replace('', np.nan)	
portfolio['Date'] = datetime.now().strftime('%Y-%m-%d')
portfolio['Upside'] = (portfolio['mean'] / portfolio['Price']) - 1


In [168]:

portfolio['Upside'] = ((100*portfolio['Upside']).round(1)).astype('str') + '%'

In [170]:
portfolio.reset_index().rename(columns={'index': 'Stock'})\
    .to_json(f'./data/portfolio_{datetime.now().strftime("%Y%m%d")}.json', orient='records')

In [171]:
pd.read_json(f'./data/portfolio_{datetime.now().strftime("%Y%m%d")}.json', orient='records')

Unnamed: 0,Stock,AvgPrice,Price,Qty,Volume,recomendation,low,mean,high,analists,Date,Upside
0,ABCB4,14.61,23.21,3128,72600.88,2.5,21.83,25.02,28.0,8,2024-09-13,7.8%
1,AGRO3,26.17,26.72,500,13360.0,2.5,29.7,36.35,43.0,2,2024-09-13,36.0%
2,AURE3,11.79,11.11,3200,35552.0,2.6,12.0,14.72,17.0,13,2024-09-13,32.5%
3,BBAS3,21.28,28.35,4950,140332.5,1.8,17.5,34.0,45.0,14,2024-09-13,19.9%
4,BBSE3,28.81,36.42,3700,134754.0,2.7,30.0,37.15,42.0,12,2024-09-13,2.0%
5,BLAU3,26.45,13.95,500,6975.0,3.3,12.0,16.55,26.0,6,2024-09-13,18.6%
6,BRBI11,16.15,15.8,700,11060.0,1.8,18.0,19.14,20.51,6,2024-09-13,21.1%
7,BRKM5,29.85,18.19,900,16371.0,2.7,20.0,26.39,35.0,9,2024-09-13,45.1%
8,CSAN3,13.63,13.06,1100,14366.0,2.2,16.5,23.69,31.8,9,2024-09-13,81.4%
9,EGIE3,33.9,44.13,1300,57369.0,3.3,38.0,44.15,50.0,13,2024-09-13,0.0%
