In [77]:
import yfinance as yf
import pandas as pd
import requests
import matplotlib.pyplot as plt
import pickle
import os

plt.style.use("bmh")

In [39]:
url = 'https://www.fundsexplorer.com.br/ranking'
headers = {
    'User-Agent': 
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36'
        ' (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
}

response = requests.get(url, headers=headers)
if response.status_code == 200:
    fiis_summary = pd.read_html(response.content, encoding='utf-8')[0]

In [66]:
bar_name = "Close"
resample_freq = "D"
instruments = list(fiis_summary.loc[fiis_summary["Liquidez Diária"] >= (fiis_summary["Liquidez Diária"].median() * 10), "Código do fundo"].unique())

error_tickers = []
bars = []
volumes = []
for inst in instruments:
    try: 
        instrument_info = yf.Ticker("{}.SA".format(inst))
        hist = instrument_info.history(period="max", debug=False)[[bar_name, "Volume"]].resample(resample_freq).ffill().dropna()
        hist.index = pd.to_datetime([dtref.strftime("%Y-%m-%d") for dtref in hist.index])

        bars.append(hist[[bar_name]].rename(columns={bar_name: "{instrument} {bar_name}".format(instrument=inst, bar_name=bar_name)}))
        volumes.append(hist[["Volume"]].rename(columns={"Volume": "{instrument} Volume".format(instrument=inst)}))
    except:
        error_tickers.append(inst)
bars_df = pd.concat(bars, axis=1)
volumes_df = pd.concat(volumes, axis=1)

In [79]:
def save_pickle(path: str,
                obj: dict):

    with open(path, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [84]:
fiis_summary = {"summary": fiis_summary,
                "bars": bars_df,
                "volumes": volumes_df}


save_pickle(path=os.path.join(os.getcwd(), "data", "inputs", "fiis_summary.pickle"), obj=fiis_summary)
