In [1]:
import warnings, requests
import pandas as pd
import numpy as np
import empyrical as ep
import plotly.express as plt

from datetime import datetime

warnings.filterwarnings('ignore')

In [2]:
class Tools:
    def __init__(self):
        self.path = 'D:/Disco/Data/CVM'

    def compute_metrics(self, fund_cnpj, funds_series, length):
        fund_data = funds_series[funds_series['CNPJ_FUNDO'] == fund_cnpj][['DT_COMPTC', 'VL_QUOTA']].tail(length)
        fund_data = fund_data.rename(columns={'DT_COMPTC':'date', 'VL_QUOTA':'value'})
        fund_data.drop_duplicates(keep='last', inplace=True)
        fund_data.set_index('date', inplace=True)

        fund_data['returns'] = ep.simple_returns(fund_data['value'])
        fund_data['returns'] = fund_data['returns'].replace([np.inf, -np.inf], np.nan)
        fund_data = fund_data.dropna()

        #selic['returns'] = ep.simple_returns(selic['value'])
        #selic_temp = selic[selic.index.isin(fund_data.index.values)]

        data = {
            'cnpj':fund_cnpj,
            'return': (1 + fund_data['returns']).cumprod()[-1] -1, 
            'avg_return': ep.annual_return(fund_data['returns']),
            'vol': np.std(fund_data['returns']) * np.sqrt(252),
            'max_dd': ep.max_drawdown(fund_data['returns'])
            #'sharpe': ep.sharpe_ratio(fund_data['returns'], ep.annual_return(selic_temp['returns']))
        }

        return pd.DataFrame([data])

    def getBCBSerie(self, serie_id, serie_name):
        data = requests.get(f"https://api.bcb.gov.br/dados/serie/bcdata.sgs.{serie_id} \
                            /dados?dataInicial=01-01-1970 \
                            &dataFinal={datetime.today().strftime('%d-%m-%Y')}").json()

        data = pd.DataFrame(data, dtype=float, columns=['data', 'valor']).rename(columns={'data':'date', 'valor':'value'})
        data.set_index('date', inplace=True)
        data['index_id'] = serie_id
        data['index_name'] = serie_name
        data.index = pd.to_datetime(data.index, format="%d/%m/%Y")
        
        return data
    
    def computeReturn(self, data, start_date, end_date):
        returns = data.loc[(data.index >= start_date) & (data.index <= end_date)]
        if(len(returns)>0):
            returns['value'] = (1 + returns['value'] / 100)
            returns.loc[returns.index.min()]['value'] = 1
            returns['value'] = (returns['value']).cumprod()
            returns['value'] = (returns['value'] - 1)
        else:
            returns = pd.DataFrame(pd.date_range(start_date, end_date), columns={'date'}).set_index('date')
            returns['value'] = 0

        return returns

    def loadCVMData():
        pass

In [3]:
tools = Tools()

In [None]:
selic = tools.getBCBSerie(11, 'SELIC')

In [None]:
selic.tail(5)

In [None]:
inf_cadastral = pd.read_parquet(f'{tools.path}/inf_cadastral.parquet')
inf_cadastral = inf_cadastral[(inf_cadastral['FUNDO_EXCLUSIVO'] == "N") & (inf_cadastral['SIT'] == "EM FUNCIONAMENTO NORMAL")]
inf_cadastral.index = inf_cadastral.index.str.replace('.','').str.replace('/', '').str.replace('-', '')
print(len(inf_cadastral))

inf_cadastral.head(3)

In [None]:
inf_diario = pd.read_parquet(f'{tools.path}/inf_diario_all.parquet').reset_index()
print(len(inf_diario))

#filtra as series conforme os cnpj validos
inf_diario = inf_diario[inf_diario['CNPJ_FUNDO'].isin(inf_cadastral.index.values)]
inf_diario['DT_COMPTC'] = pd.to_datetime(inf_diario['DT_COMPTC'], format='%Y-%m-%d')
print(len(inf_diario))

inf_diario.head(5)

In [None]:
funds_series = pd.merge(inf_cadastral[['TP_FUNDO', 'DENOM_SOCIAL', 'CLASSE_ANBIMA', 'CLASSE', 'RENTAB_FUNDO', 'PUBLICO_ALVO']], 
         inf_diario[['DT_COMPTC', 'CNPJ_FUNDO', 'VL_TOTAL', 'VL_QUOTA', 'VL_PATRIM_LIQ']], 
         how='inner',left_on='CNPJ_FUNDO', right_on='CNPJ_FUNDO')

del inf_cadastral, inf_diario

#salva em arquivo os dados
funds_series.to_parquet(f'{tools.path}/funds_series.parquet')


In [4]:
funds_series = pd.read_parquet(f'{tools.path}/funds_series.parquet')

In [None]:
funds_series.head(4)

In [None]:
#00068305000135

fund_data = funds_series[funds_series['CNPJ_FUNDO'] == '30271177000193'][['DT_COMPTC', 'VL_QUOTA']]
fund_data = fund_data.rename(columns={'DT_COMPTC':'date', 'VL_QUOTA':'value'})
fund_data.set_index('date', inplace=True)


selic_temp = selic[selic.index.isin(fund_data.index.values)]


In [None]:
plt.line(fund_data)

In [None]:
fund_data['returns'] = ep.simple_returns(fund_data['value'])
fund_data['returns'].replace([np.inf, -np.inf], np.nan)
fund_data = fund_data.dropna(subset=['returns'])

selic['returns'] = ep.simple_returns(selic['value'])

selic_temp = selic[selic.index.isin(fund_data.index.values)]


In [None]:
plt.line(fund_data['returns'])

In [None]:
plt.line(selic_temp['returns'])

In [5]:
serie_size = 1260
funds_metrics = pd.DataFrame()

count=0
for cnpj in funds_series['CNPJ_FUNDO'].unique():
    serie = funds_series[funds_series['CNPJ_FUNDO'] == cnpj]
    
    if len(serie) > serie_size:
        print(f'computing metrics for: {cnpj}')
        metrics = tools.compute_metrics(cnpj, serie, serie_size)
        funds_metrics = pd.concat([funds_metrics, metrics])
        count +=1
        if count > 20:
            print('saving file...')
            count = 0
            funds_metrics.to_parquet('./funds_metrics.parquet')

funds_metrics.reset_index(inplace=True)
funds_metrics.to_parquet('./funds_metrics.parquet')

computing metrics for: 00068305000135
computing metrics for: 00071477000168
computing metrics for: 00073041000108
computing metrics for: 00083181000167
computing metrics for: 00089915000115
computing metrics for: 00102322000141
computing metrics for: 00180995000110
computing metrics for: 00185259000154
computing metrics for: 00194256000187
computing metrics for: 00211294000109
computing metrics for: 00222725000124
computing metrics for: 00222816000160
computing metrics for: 00280302000160
computing metrics for: 00306278000191
computing metrics for: 00322699000106
computing metrics for: 00346750000110
computing metrics for: 00360293000118
computing metrics for: 00398561000190
computing metrics for: 00400490000113
computing metrics for: 00463569000193
computing metrics for: 00524617000106
saving file...
computing metrics for: 00539553000117
computing metrics for: 00575922000127
computing metrics for: 00598452000117
computing metrics for: 00601692000123
computing metrics for: 007430260001

KeyboardInterrupt: 

In [None]:
fund_cnpj = '02838578000147'
length = 1260
serie = funds_series[funds_series['CNPJ_FUNDO'] == fund_cnpj]
metrics = tools.compute_metrics(fund_cnpj, serie,length)

metrics

In [None]:

fund_data['returns'] = ep.simple_returns(fund_data['value'])
fund_data['returns'].replace([np.inf, -np.inf], np.nan)
fund_data = fund_data.dropna()

selic['returns'] = ep.simple_returns(selic['value'])

selic_temp = selic[selic.index.isin(fund_data.index.values)]


In [None]:

data = {
    'return': (1 + fund_data['returns']).cumprod()[-1] -1, 
    'avg_return': ep.annual_return(fund_data['returns']),
    'vol': np.std(fund_data['returns']) * np.sqrt(252),
    'max_dd': ep.max_drawdown(fund_data['returns'])
}

In [None]:
fund_data

In [None]:
def sharpe_ratio(return_series, return_riskfree):
    """ Definition

    Parameters
    -------
    return_series : pandas.Series or pandas.DataFrame
        Return series.
    return_riskfree : pandas.Series or pandas.DataFrame
        Risk free asset return series.
    """
    asset_mean = np.mean(return_series)
    riskfree_mean = np.mean(return_riskfree)
    vol = np.std(return_series, ddof=1)

    return float((asset_mean - riskfree_mean) * np.sqrt(252) / vol)

In [None]:
sharpe_ratio(fund_data['returns'],selic_temp['returns']+1)

In [None]:
ep.sharpe_ratio(fund_data['returns'] )

In [None]:
((1 + selic_temp['returns']).cumprod()[-1] -1) * np.sqrt(252)

In [None]:
ep.annual_return(selic_temp['returns'])

In [None]:
ep.sharpe_ratio(fund_data['returns'], 0.08)

In [None]:
pd.DataFrame(ep.aggregate_returns(selic_temp['returns'], convert_to='yearly'))

In [None]:
ep.annual_return(fund_data['VL_QUOTA'], )