## 1. importing packages

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import pandas_datareader as web
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
import matplotlib.ticker as ticker

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM


## 2. overall configurations

In [2]:
%matplotlib inline


tqdm.pandas()
sns.set(style='whitegrid',palette='muted')
rcParams['figure.figsize'] = 14, 10

## 3. Getting the Data

In [10]:
stock = 'BOVA11.SA'

start = dt.datetime(2009, 1, 2) # First register data
end = dt.datetime(dt.datetime.now().year,dt.datetime.now().month,dt.datetime.now().day - 1)

#bova11 = web.DataReader(stock, 'yahoo', start, end)

#bova11 = bova11.reset_index()
#bova11

In [None]:
len(bova11)

4. Pre-processing

Adjust the columns name and type

In [None]:
bova11.columns = bova11.columns.str.lower()

In [None]:
bova11.date = pd.to_datetime(bova11.date)

In [None]:
bova11.isna().sum()

Removing rows with volume 0

In [None]:
bova11 = bova11.loc[(bova11.volume != 0)].reset_index()
bova11.drop(columns='index', axis = 1, inplace = True)
bova11

Sorting the data by date

In [None]:
bova11 = bova11.sort_values(by='date').reset_index(drop=True)

Adding a column with the previous close value

In [None]:
bova11["prev_close"] = bova11.shift(1)["close"]

In [None]:
bova11.head()

Adding the close difference between dates

In [4]:
bova11["close_change"]= bova11.progress_apply(
    lambda row: 0 if np.isnan(row.prev_close) else row.close - row.prev_close,
    axis = 1
)

NameError: name 'bova11' is not defined

In [5]:
bova11.head()

NameError: name 'bova11' is not defined

Checking the dataseries over time

In [6]:
daily_closing = bova11[['date','close']]
daily_closing = daily_closing.set_index('date')
ax = daily_closing.plot()
plt.title('Valor de fechamento do BOVA11 ao longo dos anos')
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("R${x:,.2f}"))

NameError: name 'bova11' is not defined

Adding moving average for 3, 10 and 30 Days

In [7]:
bova11['ma_3d'] = bova11['close'].rolling(3, center = True, min_periods=1).mean()
bova11['ma_10d'] = bova11['close'].rolling(10, center = True,min_periods=1).mean()
bova11['ma_30d'] = bova11['close'].rolling(30, center=True, min_periods=1).mean()

NameError: name 'bova11' is not defined

In [8]:
moving_avg = bova11[['date','ma_30d']]
moving_avg = moving_avg.set_index('date')
ax = moving_avg.plot()
plt.title('Valor de fechamento do IBOVESPA ao longo dos anos')
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("R${x:,.2f}"))

NameError: name 'bova11' is not defined

In [9]:
bova11.head()

NameError: name 'bova11' is not defined

Select the features 

In [55]:
rows = []

for _, row in tqdm(bova11.iterrows(), total = bova11.shape[0]):
    row_data = dict(
        open = row.open,
        high = row.high,
        low = row.low,
        close = row.close,
        close_change = row.close_change,
        ma_3d = row.ma_3d,
        ma_10d = row.ma_10d,
        ma_30d = row.ma_30d
    )

    rows.append(row_data)

features_df = pd.DataFrame(rows)

100%|██████████| 3342/3342 [00:00<00:00, 6727.92it/s]


In [56]:
features_df.head()

Unnamed: 0,open,high,low,close,close_change,ma_3d,ma_10d,ma_30d
0,38.279999,40.32,37.939999,40.32,0.0,41.16,41.496,40.072667
1,40.200001,42.0,39.700001,42.0,1.68,41.546666,41.58,39.948125
2,42.060001,42.32,41.549999,42.32,0.32,41.653333,41.311429,39.892353
3,41.75,41.77,40.450001,40.639999,-1.68,41.72,41.1475,39.842223
4,40.150002,42.200001,40.150002,42.200001,1.560001,41.613333,40.801111,39.897895


In [57]:
features_df.shape

(3342, 8)

In [58]:
train_size = int(len(features_df) * 0.9)
train_size

3007

In [59]:
train_df, test_df = features_df[:train_size], features_df[train_size + 1:]
train_df.shape, test_df.shape

((3007, 8), (334, 8))

In [60]:
scaler = MinMaxScaler(feature_range = (-1, 1))
scaler = scaler.fit(train_df)

In [61]:
train_df = pd.DataFrame(
    scaler.transform(train_df),
    index = train_df.index,
    columns=train_df.columns
)

In [70]:
train_df.head()

Unnamed: 0,open,high,low,close,close_change,ma_3d,ma_10d,ma_30d
0,-0.3903,-0.917197,-0.950326,-0.913326,0.120375,-0.897816,-0.899095,-0.96204
1,-0.35972,-0.879701,-0.910766,-0.8757,0.277752,-0.889081,-0.897183,-0.964972
2,-0.330095,-0.872559,-0.869184,-0.868533,0.150351,-0.886672,-0.903297,-0.966285
3,-0.335032,-0.884834,-0.893909,-0.906159,-0.037002,-0.885166,-0.907029,-0.967465
4,-0.360516,-0.875237,-0.900652,-0.871221,0.266511,-0.887575,-0.914916,-0.966154


In [66]:
model = Sequential()