### Lê o dataset contendo as cotações de uma ação do IBOV

In [11]:
import os
import pandas as pd
import numpy as np
import math
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from ta.momentum import roc, rsi, williams_r, StochRSIIndicator
from ta.trend import ema_indicator, macd, macd_diff, macd_signal
from ta.volume import on_balance_volume


# configura o diretorio padrao do projeto
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
DATA_DIR = os.path.join(BASE_DIR, 'data')
print(DATA_DIR)

d:\Cursos\Pós Graduação PUC Minas\2023\Projeto Integrado\consultor_de_investimentos\data


### Seção de Funçoes úteis do projeto

In [4]:
def calculate_growth_rate(price):
    '''
    Essa função recebe um objeto panda series e calcula a taxa de crescimento 
    de uma ação baseando-se em seus valores de fechamento.
    Essa taxa será usada para calcular o tempo de retorno do investimento
        params: price : pandas series
        returm: growth_rate : float

    '''
    log_returns = np.log(prices / prices.shift(1)) # calcula a diferença de preço dia a dia d - (d-1)
    mean_return = log_returns.mean()
    std_dev = log_returns.std()
    annual_return = (1 + mean_return) ** 252 - 1
    annual_volatility = std_dev * np.sqrt(252)
    growth_rate = annual_return + annual_volatility / 2
    return growth_rate

def calculate_time_to_profit(current_price, desired_profit, growth_rate):
    target_price = current_price * (1 + desired_profit)
    years_to_target = math.log(target_price / current_price) / math.log(1 + growth_rate)
    return years_to_target


In [7]:
# carrega o dataset da pasta de dados
df = pd.read_csv(os.path.join(DATA_DIR, 'abev3.csv'), index_col='date')
df

Unnamed: 0_level_0,ticker,open,high,low,volume,adj_close,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-14,ABEV3,13.15,13.360000,12.990000,29701200,13.000000,13.000000
2020-07-13,ABEV3,14.91,14.950000,13.980000,36972600,12.410870,14.000000
2020-02-27,ABEV3,14.90,14.970000,14.280000,91108200,12.854115,14.500000
2021-04-01,ABEV3,15.39,15.400000,14.990000,15974100,13.714002,15.000000
2021-10-22,ABEV3,14.83,15.230000,14.540000,29599100,13.714002,15.000000
...,...,...,...,...,...,...,...
2020-12-07,ABEV3,14.92,15.230000,14.850000,25164600,13.270764,14.970000
2022-01-31,ABEV3,14.90,15.120000,14.770000,21318000,14.220714,14.970000
2022-09-19,ABEV3,15.30,15.530000,15.180000,14253700,14.695687,15.470000
2021-07-05,ABEV3,17.42,18.000000,17.360001,18812100,16.401945,17.940001


### Adicionando Indicadores Técnicos

In [14]:
# cria as colunas com a media exponencial ema para close, open e etc.
ema_columns = ['close', 'open', 'high', 'low']
for column in ema_columns:
    df[f'ema_{column}'] = ema_indicator(close=df[column], window=12, fillna=True)

# adicionando outros indicadores que dependem da media exponencial
df['rsi'] = rsi(close=df['ema_close'], window=14)
df['willr'] = williams_r(high=df['ema_high'], low=df['ema_low'], close=df['ema_close'], lbp=14)
# m =  ta.trend.MACD(df['ema_close'], window_slow=24, window_fast=14, window_sign=14)

df['macd'] = macd(df['ema_close'], window_slow=24, window_fast=14, fillna=True)
df['macd_signal'] = macd_signal(df['ema_close'], window_slow=24, window_fast=14, fillna=True)
df['macd_diff'] = macd_diff(df['ema_close'], window_slow=24, window_fast=14, window_sign=14,fillna=True)
df['obv'] = on_balance_volume(close=df['close'], volume=df['volume'], fillna=True)
df['roc'] = roc(close=df['close'], window=14, fillna=True)

# criando o objeto stock_rsi
stock_rsi = StochRSIIndicator(df['close'], window=14)
df['stoch_rsi'] = stock_rsi.stochrsi()
df = df.dropna()
# mostrando os indicadores tecnicos no dataset
df.iloc[:, 6:]    

Unnamed: 0_level_0,close,ema_close,ema_open,ema_high,ema_low,rsi,willr,macd,macd_signal,macd_diff,obv,roc,stoch_rsi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-04-26,14.750000,14.440544,14.425577,14.680877,14.230093,58.304788,-66.138387,0.108289,0.166692,-0.079093,765594600,-7.812500,0.387172
2022-03-29,15.250000,14.565075,14.506257,14.791511,14.328540,61.652493,-58.958879,0.110821,0.155518,-0.066354,803694700,-4.687500,0.471880
2021-05-10,16.500000,14.862756,14.759141,15.077433,14.599534,68.220874,-23.933256,0.127533,0.149921,-0.043022,833049700,3.125000,1.000000
2021-06-28,17.500000,15.268486,15.130042,15.473212,14.968837,74.605457,-12.637316,0.161137,0.152164,-0.008163,874375300,37.254902,1.000000
2021-07-23,16.750000,15.496411,15.434651,15.726564,15.239785,77.357500,-12.285541,0.198368,0.161405,0.025192,862600900,31.372549,0.742872
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-07,14.970000,14.833157,14.771775,15.040059,14.632090,43.620612,-84.846977,0.006167,0.054214,-0.054929,16479999800,1.698370,0.413731
2022-01-31,14.970000,14.854210,14.791502,15.052358,14.653307,44.107419,-83.521548,-0.010558,0.041260,-0.062100,16501317800,-1.642576,0.413731
2022-09-19,15.470000,14.948947,14.869733,15.125841,14.734337,46.352245,-77.557156,-0.018729,0.029262,-0.060902,16515571500,1.642576,0.515514
2021-07-05,17.940001,15.409109,15.262081,15.568020,15.138285,55.666291,-48.586557,-0.000502,0.023309,-0.036985,16534383600,14.122139,0.921492


### Separando os dados em Treinamento e Testes

In [15]:
columns = df.columns
print(columns)

Index(['ticker', 'open', 'high', 'low', 'volume', 'adj_close', 'close',
       'ema_close', 'ema_open', 'ema_high', 'ema_low', 'rsi', 'willr', 'macd',
       'macd_signal', 'macd_diff', 'obv', 'roc', 'stoch_rsi'],
      dtype='object')


In [16]:
# dataset X - Features
X = df[[
    'volume', 'ema_close', 'ema_open', 'ema_high', 'ema_low', 'rsi', 'willr',
     'macd', 'macd_signal', 'macd_diff', 'obv', 'roc', 'stoch_rsi'
]]

# dataset contendo os dados que eu quero prever dos proximos 30 dias
y = df['close'].shift(-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

### Criando o Modelo de Regressão Linear e Treinando

In [19]:
model = LinearRegression()
model.fit(X_train, y_train)

### Analisando o modelo

In [20]:
y_pred = model.predict(X_test)
mse = np.mean((y_test - y_pred) ** 2)
rmse = np.sqrt(mse)
# quanto menor o rmse melhor o modelo
print(f'Root Mean Squared Error: {rmse: .2f}')

Root Mean Squared Error:  1.60


### Calculando o Tempo de Retorno do Investimento

 1 - Para calcular o tempo de retorno primeiro simular uma data de compra

 2 - Passar um valor aleatório do dataset
  
 3 - Validar se o tempo para atingir esse valor bate

In [35]:
papel = df['ticker'][0]
papel

'ABEV3'

In [40]:
prices = pd.Series(y_pred) # dados previstos pelo modelo de regressao
taxa_crescimento = calculate_growth_rate(prices)
current_price = 13.52
desired_profit = 0.2
time_to_profit = calculate_time_to_profit(current_price, desired_profit, taxa_crescimento)
print(f'Com a taxa de crescimento de {taxa_crescimento * 100: .1f}% ao ano da {papel.upper()}')
print(f'e com o preço de compra de {current_price: .2f}')
print(f'O tempo para o retorno de {desired_profit * 100: .1f}% é de {time_to_profit: .2f} ano(s)')


Com a taxa de crescimento de  16.7% ao ano da ABEV3
e com o preço de compra de  13.52
O tempo para o retorno de  20.0% é de  1.18 ano(s)


Criar um dataset novo contendo as predições para 30 dias, o valor de compra, o rmse, o retorno esperado, a taxa de crescimento do
papel e quanto tempo para atingir o retorno esperado

In [47]:
# cria um novo dataset contendo as predicoes e o tempo para o retorno
df_f = pd.DataFrame()
df_f['ticker'] = [papel for _ in range(len(y_pred))]
df_f['dias'] = [i for i in range(len(y_pred))]
df_f['predicoes'] = pd.Series(y_pred)
df_f['preco_compra'] = [current_price for _ in range(len(y_pred))]
df_f['lucro_esperado'] = [desired_profit for _ in range(len(y_pred))]
df_f['taxa_crescimento_anual'] = [taxa_crescimento for _ in range(len(y_pred))]
df_f['tempo_para_lucro_anos'] = [time_to_profit for _ in range(len(y_pred))]


df_f

Unnamed: 0,ticker,dias,predicoes,preco_compra,lucro_esperado,taxa_crescimento_anual,tempo_para_lucro_anos
0,ABEV3,0,15.187052,13.52,0.2,0.167369,1.178149
1,ABEV3,1,14.691841,13.52,0.2,0.167369,1.178149
2,ABEV3,2,14.708203,13.52,0.2,0.167369,1.178149
3,ABEV3,3,14.654945,13.52,0.2,0.167369,1.178149
4,ABEV3,4,14.384075,13.52,0.2,0.167369,1.178149
...,...,...,...,...,...,...,...
139,ABEV3,139,14.907231,13.52,0.2,0.167369,1.178149
140,ABEV3,140,14.972242,13.52,0.2,0.167369,1.178149
141,ABEV3,141,15.027457,13.52,0.2,0.167369,1.178149
142,ABEV3,142,15.267679,13.52,0.2,0.167369,1.178149
