# Ibovespa forecasting using neural networks

## Machine Learning Engineer Nanodegree - Capstone Proposal

### Data Manipulation

- Data collection
- Data preparation
- Feature Engineering

### Import python packages

In [14]:
import json
import pandas as pd

from ibovespa.utils import load_config
from ibovespa.data_collection import collect_data
from ibovespa.data_preparation import prepare_data
from ibovespa.feature_engineering import engineer_features
from ibovespa.model_training import train_model

### Load Configurations

In [2]:
config = load_config()

### Data Collection

In [3]:
period = config["data_collection"]["period"]
stocks = config["data_collection"]["stocks"]

raw_data = collect_data(stocks=stocks, data_size=period)

In [4]:
raw_data.tail()

Unnamed: 0,date,^BVSP,ITUB4,BBDC4,VALE3,PETR4,PETR3,ABEV3,BBAS3,B3SA3,ITSA4
359,2021-01-21,118443.0,29.91,25.48,93.360001,27.549999,28.09,15.61,34.18,58.099998,11.04
360,2021-01-22,117172.0,29.15,24.99,92.629997,27.02,27.700001,15.03,33.419998,58.459999,10.82
361,2021-01-26,116464.0,28.25,24.41,91.75,27.0,27.6,15.4,32.790001,59.299999,10.62
362,2021-01-27,115882.0,28.67,24.940001,90.540001,27.93,28.42,15.53,33.77,59.369999,10.65
363,2021-01-28,118883.25,29.389999,25.530001,91.099998,27.76,28.6,15.54,34.540001,61.290001,10.97


### Data Preparation

In [5]:
test_split  = config["data_preparation"]["split_size"]["test"]
valid_split = config["data_preparation"]["split_size"]["validation"]

clean_data = prepare_data(raw_data, split=test_split, split_valid=valid_split)

In [6]:
clean_data.tail()

Unnamed: 0,date,IBOV,ITUB4,BBDC4,VALE3,PETR4,PETR3,ABEV3,BBAS3,B3SA3,ITSA4,group
359,2021-01-21,118443.0,29.91,25.48,93.360001,27.549999,28.09,15.61,34.18,58.099998,11.04,test
360,2021-01-22,117172.0,29.15,24.99,92.629997,27.02,27.700001,15.03,33.419998,58.459999,10.82,test
361,2021-01-26,116464.0,28.25,24.41,91.75,27.0,27.6,15.4,32.790001,59.299999,10.62,test
362,2021-01-27,115882.0,28.67,24.940001,90.540001,27.93,28.42,15.53,33.77,59.369999,10.65,test
363,2021-01-28,118883.25,29.389999,25.530001,91.099998,27.76,28.6,15.54,34.540001,61.290001,10.97,test


### Feature Engineering

In [7]:
window = config["feature_engineering"]["window"]

feature_table, scaler = engineer_features(clean_data, window, "train", "IBOV", model=None)

In [8]:
feature_table.tail()

Unnamed: 0,date,group,target,lags,delta_sign,weekday_vector,lag_pct_IBOV,lag_pct_ITUB4,lag_pct_BBDC4,lag_pct_VALE3,lag_pct_PETR4,lag_pct_PETR3,lag_pct_ABEV3,lag_pct_BBAS3,lag_pct_B3SA3,lag_pct_ITSA4
350,2021-01-21,test,[0.8244488190036598],"[0.8414817727198061, 0.854475290772044, 0.8521...","[-1.0, 1.0, -1.0, 1.0, -1.0, 1.0, 0]","[0, 0, 0, 1, 0, 0, 0]","[-0.015206429246769515, 0.0027018913239267306,...","[-0.016542336365355736, -0.022820897964039744,...","[-0.020833304735144775, -0.018952071043243923,...","[-0.018498808302494507, 0.0022376034937343636,...","[-0.016730618356564864, 0.014139257885662726, ...","[-0.016826980231628874, 0.0069156556850564765,...","[-0.015365703316910784, 0.011815953666610923, ...","[-0.022366957031517476, -0.03969546310301886, ...","[-0.009737582952666357, 0.010338484563357397, ...","[-0.01572046764191315, -0.02387039589924911, -..."
351,2021-01-22,test,[0.8073350765742355],"[0.8244488190036598, 0.8414817727198061, 0.854...","[-1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 0]","[0, 0, 0, 0, 1, 0, 0]","[-0.02024161932954638, -0.015206429246769515, ...","[-0.013522422544156476, -0.016542336365355736,...","[-0.01431337848252523, -0.020833304735144775, ...","[0.011265174598843242, -0.018498808302494507, ...","[-0.023395954230122085, -0.016730618356564864,...","[-0.0188613001749125, -0.016826980231628874, 0...","[-0.025593058013161918, -0.015365703316910784,...","[-0.0101360696928533, -0.022366957031517476, -...","[-0.031666692097981786, -0.009737582952666357,...","[-0.020408206439086918, -0.01572046764191315, ..."
352,2021-01-26,test,[0.7978020084074122],"[0.8073350765742355, 0.8244488190036598, 0.841...","[-1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 0]","[0, 1, 0, 0, 0, 0, 0]","[-0.020757798464804855, -0.02024161932954638, ...","[-0.025409569801371767, -0.013522422544156476,...","[-0.019230760593457963, -0.01431337848252523, ...","[-0.007819230421605816, 0.011265174598843242, ...","[-0.019237705770383973, -0.023395954230122085,...","[-0.013883922660374526, -0.0188613001749125, -...","[-0.0371556653723536, -0.025593058013161918, -...","[-0.0222352875788413, -0.0101360696928533, -0....","[0.006196224093050873, -0.031666692097981786, ...","[-0.019927560488132712, -0.020408206439086918,..."
353,2021-01-27,test,[0.7899655032194304],"[0.7978020084074122, 0.8073350765742355, 0.824...","[-1.0, -1.0, -1.0, -1.0, 1.0, -1.0, 0]","[0, 0, 1, 0, 0, 0, 0]","[-0.011808068847045439, -0.020757798464804855,...","[-0.030874772909367576, -0.025409569801371767,...","[-0.02320928087307872, -0.019230760593457963, ...","[-0.009500132565161024, -0.007819230421605816,...","[-0.0007402093791573083, -0.019237705770383973...","[-0.0036101219752859848, -0.013883922660374526...","[0.024617424626257156, -0.0371556653723536, -0...","[-0.018850906281717816, -0.0222352875788413, -...","[0.014368802014076643, 0.006196224093050873, -...","[-0.018484271248252138, -0.019927560488132712,..."
354,2021-01-28,test,[0.8303766908404451],"[0.7899655032194304, 0.7978020084074122, 0.807...","[-1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 0]","[0, 0, 0, 1, 0, 0, 0]","[-0.009822619027526902, -0.011808068847045439,...","[0.014867259337838723, -0.030874772909367576, ...","[0.021712441210920197, -0.02320928087307872, -...","[-0.013188000920682952, -0.009500132565161024,...","[0.03444445574725119, -0.0007402093791573083, ...","[0.029710133459807997, -0.0036101219752859848,...","[0.008441566081890839, 0.024617424626257156, -...","[0.029887145924788916, -0.018850906281717816, ...","[0.0011804333174505466, 0.014368802014076643, ...","[0.002824833643546798, -0.018484271248252138, ..."


#### Save feature table on disk

In [9]:
feature_table.to_csv("data/data.csv", index=None)

#### Save scaler attributes

In [15]:
with open("data/scaler.json", "w"):
    json.dumps({"maximo": scaler.maximo, "minimo": scaler.minimo})