In [1]:
import gurobipy as gp
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

from markowitz import Markowitz

In [2]:
def limpa(s):
    return s.replace("/","").replace("-","").replace(".","").replace("$","").replace("R","")

def geo_mean(iterable):
    a = np.array(iterable)
    return a.prod()**(1.0/len(a))

In [3]:
df = pd.read_excel('seriehistorica10anos.xlsx')

In [4]:
for i in df.columns:
    try:
        list(df[i].values).index(0)
        df.drop([f'{i}'],axis=1,inplace=True)
    except:
        True

In [5]:
df.fillna(method='ffill',inplace=True)
df.rename(columns={df.columns[0]: 'datas'},
          inplace=True, errors='raise')

In [6]:
datas = list(df['datas'][:])
aux = [i[0:4] for i in datas]
anos = pd.Series(aux).unique().tolist()

limites_anos = [0]

a = 0

for c in anos:
    a += aux.count(c)
    limites_anos.append(a)

print(anos)

['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']


In [7]:
DF_anos = dict()

for k in range(len(limites_anos)-1):
    DF_anos[anos[k]] = df[limites_anos[k]:limites_anos[k+1]].reset_index().drop(['index'],axis=1)

In [8]:
DF_meses = dict()
Meses = dict()

for j  in range(len(limites_anos)-1):
    year = anos[j]
    datas = list(DF_anos[year]['datas'][:])
    aux = [i[5:7] for i in datas]
    Meses[year] = pd.Series(aux).unique().tolist()

    limites_meses = [0]

    a = 0

    for c in Meses[year]:
        a += aux.count(c)
        limites_meses.append(a)
        
    for k in range(len(limites_meses)-1):
        DF_meses[year,Meses[year][k]] = DF_anos[year][limites_meses[k]:limites_meses[k+1]].reset_index().drop(['index'],axis=1)

In [9]:
minRetorno = 0.2
C = 100_000
P_categorias = [0.25, 0.05, 0.4, 0.3]
K_min = 3
K_max = 10
P_min = 0.05
P_max = 0.3
l = 1
log = 0

In [10]:
ano_inicial = 2018
ano_final = 2019
mes_inicial = 9
mes_final = 9
incr_janela = 6
janela_meses = 18

In [12]:
anos_escolhidos = anos[anos.index(str(ano_inicial)):(ano_final-ano_inicial+1+anos.index(str(ano_inicial)))]
print(anos_escolhidos)
seq = []
for k  in anos_escolhidos:
    if k == str(ano_final):
        seq += [(k,q) for q  in Meses[k][0:mes_final]]
    elif k == str(ano_inicial):
        seq += [(k,q) for q  in Meses[k][mes_inicial-1:-1]]
    else:
        seq += [(k,q) for q  in Meses[k]]
print(len(seq)/incr_janela)
print(seq)

['2018', '2019']
2.0
[('2018', '09'), ('2018', '10'), ('2018', '11'), ('2019', '01'), ('2019', '02'), ('2019', '03'), ('2019', '04'), ('2019', '05'), ('2019', '06'), ('2019', '07'), ('2019', '08'), ('2019', '09')]


In [17]:
Hist = dict()
DF = pd.DataFrame()
for i in range(int(len(seq)/incr_janela)):
    #DF = pd.DataFrame()
    for j in range(incr_janela):
        print(seq[incr_janela*i+j])
        DF = pd.concat([DF, DF_meses[seq[incr_janela*i+j]]], axis=0).reset_index().drop(['index', 'datas'],axis=1)
        
    pf = pd.read_excel('perfilfundos.xlsx')
    pf.drop(['NOME','DATA_INICIO','CARENCIA','FLAG_QUALIFICADO'],axis=1,inplace=True)
    pf['CNPJ'] = pf['CNPJ'].apply(limpa)
    pf['APLICACAO_MINIMA'] = pd.to_numeric(pf['APLICACAO_MINIMA'].apply(limpa),errors='coerce')
    pf.fillna(0,inplace=True)
    pf.set_index('CNPJ',inplace=True)
    pf_ordered = pf.sort_values(by ='CATEGORIA')
    pf_ordered.drop(index=(list(set(pf.index)-set(DF.columns))),inplace=True)

    categorias = list(pf_ordered['CATEGORIA'].unique())

    limites = [0]

    a = 0

    for c in categorias:
        a += len(pf_ordered[pf_ordered['CATEGORIA'] == c])
        limites.append(a)

    cnpj = list(pf_ordered.index)
    DF = DF[cnpj]

    df_retorno = DF.pct_change().dropna()
    sigma = (df_retorno.cov()*252).to_numpy()
    media = (df_retorno+1).apply(geo_mean)**252-1

    minFundos = np.array(pf_ordered['APLICACAO_MINIMA'])

    n = len(minFundos)

    import time

    tempo_init = time.time()
    modelo = Markowitz(C, minRetorno, K_min, K_max, P_min, P_max, P_categorias, limites, 
        n, minFundos, sigma, media, cnpj);
    Carteira = modelo.solve()
    print('Tempo total:', time.time() - tempo_init, 's')

    Carteira.Exibir(C)

    Hist[seq[incr_janela*i]] = Carteira
    print('\n')
    print('###############################################')
    print('\n')

('2018', '09')
('2018', '10')
('2018', '11')
('2019', '01')
('2019', '02')
('2019', '03')
Tempo total: 0.2849400043487549 s
Retorno esperado --> 20.0%
Risco estimado --> 2.99%
Fundo 31 (08830947000131) --> 5.0%; R$ 5000.0 --> R$ 5000.0
Fundo 50 (12082452000149) --> 20.0%; R$ 20000.0 --> R$ 20000.0
Fundo 60 (01430938000104) --> 5.0%; R$ 5000.0 --> R$ 5000.0
Fundo 101 (11701985000107) --> 10.0%; R$ 10000.0 --> R$ 1000.0
Fundo 114 (09620860000100) --> 30.0%; R$ 30000.0 --> R$ 1000.0
Fundo 126 (09326708000101) --> 9.24%; R$ 9237.97 --> R$ 5000.0
Fundo 131 (09720734000110) --> 20.76%; R$ 20762.03 --> R$ 1000.0


###############################################


('2019', '04')
('2019', '05')
('2019', '06')
('2019', '07')
('2019', '08')
('2019', '09')
Tempo total: 0.25188779830932617 s
Retorno esperado --> 20.0%
Risco estimado --> 2.9%
Fundo 22 (09550197000107) --> 5.0%; R$ 5000.0 --> R$ 500.0
Fundo 50 (12082452000149) --> 20.0%; R$ 20000.0 --> R$ 20000.0
Fundo 60 (01430938000104) --> 5.0%; R