In [1]:
import pandas as pd
import zipfile, urllib.request, shutil

In [2]:
def pad(x):
    if(x < 10):
        return "0" + str(x)
    return str(x)

In [3]:
def download_fund_files(start_date, end_date):
    start_date = start_date.split('-')
    start_year = start_date[0]
    start_month = start_date[1]
    end_date = end_date.split('-')
    end_year = end_date[0]
    end_month = end_date[1]
    years = []
    for i in range(0, int(end_year) - int(start_year) + 1):
        years.append(str((int(start_year) + i)))
    for year in years:
        if(year == start_year):
            months = [pad(x) for x in range(int(start_month), 13)]
        elif(year != end_year):
            months = [pad(x) for x in range(1, 13)]
        else:
            months = [pad(x) for x in range(1, int(end_month) + 1)]
        for month in months:
            url = 'http://dados.cvm.gov.br/dados/FI/DOC/CDA/DADOS/' + 'cda_fi_' + year + month + '.zip'
            file_name = 'cda_fi_' + year + month + '.zip'
            with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file:
                shutil.copyfileobj(response, out_file)
                with zipfile.ZipFile(file_name) as zf:
                    zf.extractall()

In [4]:
def download_quota_files(start_date, end_date):
    start_date = start_date.split('-')
    start_year = start_date[0]
    start_month = start_date[1]
    end_date = end_date.split('-')
    end_year = end_date[0]
    end_month = end_date[1]
    years = []
    for i in range(0, int(end_year) - int(start_year) + 1):
        years.append(str((int(start_year) + i)))
    for year in years:
        if(year == start_year):
            months = [pad(x) for x in range(int(start_month), 13)]
        elif(year != end_year):
            months = [pad(x) for x in range(1, 13)]
        else:
            months = [pad(x) for x in range(1, int(end_month) + 1)]
        for month in months:
            url = 'http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_' + year + month + '.csv'
            file_name = 'inf_diario_fi_' + year + month + '.csv'
            with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file:
                shutil.copyfileobj(response, out_file)

In [5]:
download_quota_files("2019-01", "2019-12")
download_fund_files("2019-01", "2019-12")

In [6]:
def get_fund_dataframe(year, month, cnpj):
    csv_file = 'cda_fi_BLC_4_' + year + month + '.csv'
    df = pd.read_csv(csv_file, sep = ';')
    df = df.loc[df['CNPJ_FUNDO'].eq(cnpj)]
    df = df[['CNPJ_FUNDO', 'DT_COMPTC', 'CD_ATIVO', 'VL_MERC_POS_FINAL']]
    df.reset_index(inplace = True, drop = True)
    return df

In [7]:
df = get_fund_dataframe('2019', '01', '32.812.291/0001-09')
for month in range(2, 13):
    if(month < 10):
        month = '0' + str(month)
    else:
        month = str(month)
    tmp = get_fund_dataframe('2019', month, '32.812.291/0001-09')
    df = pd.concat([tmp, df], axis = 0)

In [8]:
df['month'] = pd.DatetimeIndex(df['DT_COMPTC']).month
df['year'] = pd.DatetimeIndex(df['DT_COMPTC']).year

In [9]:
groups = df.groupby(['year', 'month']).sum().reset_index().rename(columns = {'VL_MERC_POS_FINAL' : 'SUM_VL_MERC'})

In [10]:
df = df.merge(groups, on = ['year', 'month'])
df['MERC_POS_PERCENTAGE'] = 100 * (df['VL_MERC_POS_FINAL'] / df['SUM_VL_MERC'])
df.drop(columns = ['year', 'month'], inplace = True)

In [11]:
df.to_excel("fundo_navi.xlsx")
df.to_csv("fundo_navi.csv")

In [12]:
def get_quotas_dataframe(year, month, cnpj):
    csv_file = 'inf_diario_fi_' + year + month + '.csv'
    df = pd.read_csv(csv_file, sep = ';')
    df = df.loc[df['CNPJ_FUNDO'].eq(cnpj)]
    df = df[['CNPJ_FUNDO', 'DT_COMPTC', 'VL_TOTAL', 'VL_QUOTA', 'VL_PATRIM_LIQ']]
    df.reset_index(inplace = True, drop = True)
    return df

In [13]:
df = get_quotas_dataframe('2019', '01', '32.812.291/0001-09')
for month in range(2, 13):
    if(month < 10):
        month = '0' + str(month)
    else:
        month = str(month)
    tmp = get_quotas_dataframe('2019', month, '32.812.291/0001-09')
    df = pd.concat([tmp, df], axis = 0)

In [14]:
df.to_excel("fundo_navi_cotas.xlsx")
df.to_csv("fundo_navi_cotas.csv")