In [1]:
import pandas as pd
import datetime
import locale
import os

import matplotlib.pyplot as plt

locale.setlocale(locale.LC_TIME, 'pt_BR.utf8')

'pt_BR.utf8'

In [2]:
def read_data(filename):
    df_results = pd.read_csv(filename)
    df_results = df_results.drop('datas',axis=1)
    df_results = df_results[['variacao_carteira', 'variacao_benchmark']]
    
    return df_results

In [3]:
def calculate_beta(df_results):
    covBench = df_results.cov()
    var = df_results.var(axis=0)
    beta = covBench.iat[0,1]/var[1]
    return beta

In [4]:
def read_cdi(dt_start,dt_end):
    start_month = datetime.datetime.strptime(dt_start, "%Y-%m-%d").strftime("%b").upper()
    start_year = datetime.datetime.strptime(dt_start, "%Y-%m-%d").strftime("%Y")

    end_month = datetime.datetime.strptime(dt_end, "%Y-%m-%d").strftime("%b").upper()
    end_year = datetime.datetime.strptime(dt_end, "%Y-%m-%d").strftime("%Y")

    cdi_hist = pd.read_csv('../../data/cdi_historico_1986-2022.csv', delimiter=';')
    rf_df = pd.DataFrame()

    if end_year != start_year:
        for ano in range(int(start_year),int(end_year)+1):
            print(ano)
            if ano != int(end_year) and ano != int(start_year): # ano intermediario (entre inicial e final)
                rf_df = pd.concat([rf_df,cdi_hist[str(ano)]], axis=0)
            elif ano == int(end_year):
                print('end year')
                month_number = cdi_hist.index[cdi_hist['MES'] == end_month].tolist()[0]
                for j in range(month_number):
                    rf_df = pd.concat([rf_df,cdi_hist[str(ano)].filter(items=[j],axis=0)], axis=0)
            elif ano == int(start_year):
                print('start year')
                month_number = cdi_hist.index[cdi_hist['MES'] == start_month].tolist()[0]
                for k in range(month_number-1,12):
                    rf_df = pd.concat([rf_df,cdi_hist[str(ano)].filter(items=[k],axis=0)], axis=0)
    else:
        start_month_number = cdi_hist.index[cdi_hist['MES'] == start_month].tolist()[0]
        end_month_number = cdi_hist.index[cdi_hist['MES'] == end_month].tolist()[0]
        for m in range(start_month_number,end_month_number+1):
            rf_df = pd.concat([rf_df,cdi_hist[str(end_year)].filter(items=[m],axis=0)], axis=0)

    return rf_df

In [5]:
def calculate_sharpe(df_results,rf):
    r = df_results['variacao_carteira'].mean()
    std = df_results['variacao_carteira'].std()
    Sf = (r-rf)/std
    return Sf

In [6]:
def calculate_err(df_results):
    res_benchmark = df_results['variacao_benchmark'].to_numpy()
    res_carteira = df_results['variacao_carteira'].to_numpy()

    ret_benchmark = 1
    ret_carteira = 1
    for i in range(len(res_benchmark)):
        ret_benchmark = ret_benchmark * (1 + res_benchmark[i])
        ret_carteira = ret_carteira * (1 + res_carteira[i])

    err = ret_carteira-ret_benchmark
    return abs(err), err



In [7]:
def create_analysis(folder):
    df_betas = pd.DataFrame()
    index = 0
    for file_train in os.listdir(f"../{folder}"):
        if file_train.startswith("T_"):
            t_start_date, t_end_date = file_train.split("_")[1:]

            for file_validate in os.listdir(f"../{folder}/{file_train}"):
                if file_validate.startswith("V_"):
                    v_start_date, v_end_date = file_validate.split("_")[1:]

                train_folder = (f"T_{t_start_date}_{t_end_date}")
                validation_folder = (f"V_{v_start_date}_{v_end_date}")

                path = f'../{folder}/{train_folder}/{validation_folder}'
                for file in os.listdir(path):
                    if file.endswith(".csv"):
                        model = file.split('.')[0]
                        filename = f'{path}/{model}.csv'
                        results = read_data(filename)
                        beta = calculate_beta(results)
                        rf_df = read_cdi(v_start_date,v_end_date)
                        rf = rf_df.iloc[:,0].astype(float).mean()
                        sharpe = calculate_sharpe(results,rf)
                        mod_err,err = calculate_err(results)
                        beta_dict = {
                            'dt_ini_treinamento': t_start_date,
                            'dt_fin_treinamento': t_end_date,
                            'dt_ini_validacao': v_start_date,
                            'dt_fin_validacao': v_end_date,
                            'modelo': model,
                            'beta': beta,
                            'sharpe': sharpe,
                            'mod_err': mod_err,
                            'erro': err
                        }
                        beta_df = pd.DataFrame(data=beta_dict,index=[index])
                        df_betas = pd.concat([df_betas,beta_df])
                        index += 1
                        
    df_betas.to_csv(f'../{folder}/analises.csv')

In [8]:
create_analysis("T1a_V1a_anual")
create_analysis("T3m_V3m_anual")
create_analysis("T3m_V3m_tri")