### PRÉ-PROCESSAMENTO DOS DADOS

In [46]:
import numpy as np
import pandas as pd
import ta
from ta import add_all_ta_features
from ta.utils import dropna
from ta.momentum import roc, rsi, williams_r, AwesomeOscillatorIndicator
from ta.trend import ema_indicator, macd, macd_diff, macd_signal
from ta.volume import on_balance_volume
import os

### DEFINIÇÃO DE VARIÁVEIS

In [54]:
# base dir
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
DATA_DIR = os.path.join(BASE_DIR, 'data')
# dias para predicao
DIAS = 30
test_size = 0.2

# lendo dataset
df = pd.read_csv(os.path.join(DATA_DIR, 'abev3.csv'))
df = df.iloc[:, 1:]
print(df.head())

         date   open   high    low    volume  adj_close  close
0  2023-02-14  13.15  13.36  12.99  29701200  13.000000   13.0
1  2020-07-13  14.91  14.95  13.98  36972600  12.410870   14.0
2  2020-02-27  14.90  14.97  14.28  91108200  12.854115   14.5
3  2021-04-01  15.39  15.40  14.99  15974100  13.714002   15.0
4  2021-10-22  14.83  15.23  14.54  29599100  13.714002   15.0


### CRIAÇÃO DOS INDICADORES TÉCNICOS

In [55]:
# clean NaN values
df = dropna(df)

In [56]:
# add ta features filling NaN values
df = add_all_ta_features(df, open='open', high='high', low='low', 
                         close='close', volume='volume', fillna=True)

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [57]:
print(df.head())

         date   open   high    low    volume  adj_close  close    volume_adi  \
0  2023-02-14  13.15  13.36  12.99  29701200  13.000000   13.0 -2.809569e+07   
1  2020-07-13  14.91  14.95  13.98  36972600  12.410870   14.0 -6.354361e+07   
2  2020-02-27  14.90  14.97  14.28  91108200  12.854115   14.5 -9.655381e+07   
3  2021-04-01  15.39  15.40  14.99  15974100  13.714002   15.0 -1.117487e+08   
4  2021-10-22  14.83  15.23  14.54  29599100  13.714002   15.0 -1.018823e+08   

   volume_obv  volume_cmf  ...  momentum_ppo  momentum_ppo_signal  \
0    29701200   -0.945945  ...      0.000000             0.000000   
1    66673800   -0.953052  ...      0.610155             0.122031   
2   157782000   -0.611944  ...      1.375210             0.372667   
3   173756100   -0.643135  ...      2.242462             0.746626   
4   203355200   -0.501006  ...      2.880281             1.173357   

   momentum_ppo_hist  momentum_pvo  momentum_pvo_signal  momentum_pvo_hist  \
0           0.000000      

In [51]:
# encontra em qual pacote esta os indicadores que eu preciso:
indicadores_procurados = ['ema_indicator', 'rsi', 'willr', 'macd', 'obv', 'roc', 'fastk']
indicators = {'ta.volatility': dir(ta.volatility),
              'ta.momentum': dir(ta.momentum),
               'ta.trend': dir(ta.trend),
               'ta.trend.EMAIndicator': dir(ta.trend.EMAIndicator),
               }
# procurando
for k, v in indicators.items():
    for i in v:
        if i.lower() in indicadores_procurados:
            print(k, i)

ta.momentum roc
ta.momentum rsi
ta.trend MACD
ta.trend ema_indicator
ta.trend macd
ta.trend.EMAIndicator ema_indicator


In [58]:
# cria as colunas com a media exponencial ema para close, open e etc.
ema_columns = ['close', 'open', 'high', 'low']
for column in ema_columns:
    df[f'ema_{column}'] = ema_indicator(close=df[column], window=12, fillna=True)

# adicionando outros indicadores que dependem da media exponencial
df['rsi'] = rsi(close=df['ema_close'], window=14)
df['willr'] = williams_r(high=df['ema_high'], low=df['ema_low'], close=df['ema_close'], lbp=14)
# m =  ta.trend.MACD(df['ema_close'], window_slow=24, window_fast=14, window_sign=14)

df['macd'] = macd(df['ema_close'], window_slow=24, window_fast=14, fillna=True)
df['macd_signal'] = macd_signal(df['ema_close'], window_slow=24, window_fast=14, fillna=True)
df['macd_diff'] = macd_diff(df['ema_close'], window_slow=24, window_fast=14, window_sign=14,fillna=True)
df['obv'] = on_balance_volume(close=df['close'], volume=df['volume'], fillna=True)
df['roc'] = roc(close=df['close'], window=14, fillna=True)

print(df.head())

         date   open   high    low    volume  adj_close  close    volume_adi  \
0  2023-02-14  13.15  13.36  12.99  29701200  13.000000   13.0 -2.809569e+07   
1  2020-07-13  14.91  14.95  13.98  36972600  12.410870   14.0 -6.354361e+07   
2  2020-02-27  14.90  14.97  14.28  91108200  12.854115   14.5 -9.655381e+07   
3  2021-04-01  15.39  15.40  14.99  15974100  13.714002   15.0 -1.117487e+08   
4  2021-10-22  14.83  15.23  14.54  29599100  13.714002   15.0 -1.018823e+08   

   volume_obv  volume_cmf  ...   ema_open   ema_high    ema_low  rsi  willr  \
0    29701200   -0.945945  ...  13.150000  13.360000  12.990000  NaN    NaN   
1    66673800   -0.953052  ...  13.420769  13.604615  13.142307  NaN    NaN   
2   157782000   -0.611944  ...  13.648343  13.814674  13.317337  NaN    NaN   
3   173756100   -0.643135  ...  13.916290  14.058571  13.574670  NaN    NaN   
4   203355200   -0.501006  ...  14.056861  14.238790  13.723182  NaN    NaN   

       macd  macd_signal  macd_diff        o

In [59]:
df.fillna(method="ffill", inplace= True)
df.fillna(method="bfill",inplace= True)
df = df.reset_index()

print(df.head())

   index        date   open   high    low    volume  adj_close  close  \
0      0  2023-02-14  13.15  13.36  12.99  29701200  13.000000   13.0   
1      1  2020-07-13  14.91  14.95  13.98  36972600  12.410870   14.0   
2      2  2020-02-27  14.90  14.97  14.28  91108200  12.854115   14.5   
3      3  2021-04-01  15.39  15.40  14.99  15974100  13.714002   15.0   
4      4  2021-10-22  14.83  15.23  14.54  29599100  13.714002   15.0   

     volume_adi  volume_obv  ...   ema_open   ema_high    ema_low    rsi  \
0 -2.809569e+07    29701200  ...  13.150000  13.360000  12.990000  100.0   
1 -6.354361e+07    66673800  ...  13.420769  13.604615  13.142307  100.0   
2 -9.655381e+07   157782000  ...  13.648343  13.814674  13.317337  100.0   
3 -1.117487e+08   173756100  ...  13.916290  14.058571  13.574670  100.0   
4 -1.018823e+08   203355200  ...  14.056861  14.238790  13.723182  100.0   

      willr      macd  macd_signal  macd_diff        obv  roc  
0 -8.267343  0.000000     0.000000   0.0