# Ibovespa forecasting using neural networks

## Machine Learning Engineer Nanodegree - Capstone Proposal

### Data Manipulation

- Data collection
- Data preparation
- Feature Engineering

### Import python packages

In [1]:
import json
import pandas as pd

from ibovespa.utils import load_config
from ibovespa.data_collection import collect_data
from ibovespa.data_preparation import prepare_data
from ibovespa.feature_engineering import engineer_features
from ibovespa.model_training import train_model

### Load Configurations

In [2]:
config = load_config()

### Data Collection

In [3]:
period = config["data_collection"]["period"]
stocks = config["data_collection"]["stocks"]

raw_data = collect_data(stocks=stocks, data_size=period)

In [4]:
raw_data

Unnamed: 0,date,^BVSP,ITUB4,BBDC4,VALE3,PETR4,PETR3,ABEV3,BBAS3,B3SA3,ITSA4
0,2019-08-08,104115.0,37.000000,31.872726,47.180000,26.350000,28.870001,20.170000,48.799999,45.330002,13.12
1,2019-08-09,103996.0,36.720001,31.345453,45.490002,26.280001,28.620001,20.070000,48.650002,45.500000,13.06
2,2019-08-12,101915.0,35.200001,30.690908,45.160000,25.650000,27.850000,19.480000,47.000000,44.240002,12.60
3,2019-08-13,103299.0,35.889999,31.000000,46.500000,25.790001,27.959999,19.379999,47.549999,45.860001,12.92
4,2019-08-14,100258.0,35.099998,30.390909,44.880001,24.920000,27.100000,19.010000,45.889999,43.700001,12.75
...,...,...,...,...,...,...,...,...,...,...,...
359,2021-01-22,117172.0,29.150000,24.990000,92.629997,27.020000,27.700001,15.030000,33.419998,58.459999,10.82
360,2021-01-26,116464.0,28.250000,24.410000,91.750000,27.000000,27.600000,15.400000,32.790001,59.299999,10.62
361,2021-01-27,115882.0,28.250000,24.760000,89.199997,27.400000,27.980000,15.400000,33.750000,59.009998,10.55
362,2021-01-28,119314.0,29.389999,25.530001,91.099998,27.760000,28.600000,15.540000,34.540001,61.290001,10.97


### Data Preparation

In [5]:
test_split  = config["data_preparation"]["split_size"]["test"]
valid_split = config["data_preparation"]["split_size"]["validation"]

clean_data = prepare_data(raw_data, split=test_split, split_valid=valid_split)

In [6]:
clean_data.tail()

Unnamed: 0,date,IBOV,ITUB4,BBDC4,VALE3,PETR4,PETR3,ABEV3,BBAS3,B3SA3,ITSA4,group
359,2021-01-22,117172.0,29.15,24.99,92.629997,27.02,27.700001,15.03,33.419998,58.459999,10.82,test
360,2021-01-26,116464.0,28.25,24.41,91.75,27.0,27.6,15.4,32.790001,59.299999,10.62,test
361,2021-01-27,115882.0,28.25,24.76,89.199997,27.4,27.98,15.4,33.75,59.009998,10.55,test
362,2021-01-28,119314.0,29.389999,25.530001,91.099998,27.76,28.6,15.54,34.540001,61.290001,10.97,test
363,2021-01-29,116007.0,28.34,24.74,88.669998,27.059999,27.73,15.3,34.07,59.75,10.74,test


### Feature Engineering

In [7]:
window = config["feature_engineering"]["window"]

feature_table, scaler = engineer_features(clean_data, window, "train", "IBOV")

In [8]:
feature_table.tail()

Unnamed: 0,date,group,target,lags,delta_sign,weekday_vector,lag_pct_IBOV,lag_pct_ITUB4,lag_pct_BBDC4,lag_pct_VALE3,lag_pct_PETR4,lag_pct_PETR3,lag_pct_ABEV3,lag_pct_BBAS3,lag_pct_B3SA3,lag_pct_ITSA4
350,2021-01-22,test,[0.8073350765742355],"[0.8244488190036598, 0.8414817727198061, 0.854...","[-1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 0]","[0, 0, 0, 0, 1, 0, 0]","[-0.02024161932954638, -0.015206429246769515, ...","[-0.013522422544156476, -0.016542336365355736,...","[-0.01431337848252523, -0.020833304735144775, ...","[0.011265174598843242, -0.018498808302494507, ...","[-0.023395954230122085, -0.016730618356564864,...","[-0.0188613001749125, -0.016826980231628874, 0...","[-0.025593058013161918, -0.015365703316910784,...","[-0.0101360696928533, -0.022366957031517476, -...","[-0.031666692097981786, -0.009737582952666357,...","[-0.020408206439086918, -0.01572046764191315, ..."
351,2021-01-26,test,[0.7978020084074122],"[0.8073350765742355, 0.8244488190036598, 0.841...","[-1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 0]","[0, 1, 0, 0, 0, 0, 0]","[-0.020757798464804855, -0.02024161932954638, ...","[-0.025409569801371767, -0.013522422544156476,...","[-0.019230760593457963, -0.01431337848252523, ...","[-0.007819230421605816, 0.011265174598843242, ...","[-0.019237705770383973, -0.023395954230122085,...","[-0.013883922660374526, -0.0188613001749125, -...","[-0.0371556653723536, -0.025593058013161918, -...","[-0.0222352875788413, -0.0101360696928533, -0....","[0.006196224093050873, -0.031666692097981786, ...","[-0.019927560488132712, -0.020408206439086918,..."
352,2021-01-27,test,[0.7899655032194304],"[0.7978020084074122, 0.8073350765742355, 0.824...","[-1.0, -1.0, -1.0, -1.0, 1.0, -1.0, 0]","[0, 0, 1, 0, 0, 0, 0]","[-0.011808068847045439, -0.020757798464804855,...","[-0.030874772909367576, -0.025409569801371767,...","[-0.02320928087307872, -0.019230760593457963, ...","[-0.009500132565161024, -0.007819230421605816,...","[-0.0007402093791573083, -0.019237705770383973...","[-0.0036101219752859848, -0.013883922660374526...","[0.024617424626257156, -0.0371556653723536, -0...","[-0.018850906281717816, -0.0222352875788413, -...","[0.014368802014076643, 0.006196224093050873, -...","[-0.018484271248252138, -0.019927560488132712,..."
353,2021-01-28,test,[0.83617664721454],"[0.7899655032194304, 0.7978020084074122, 0.807...","[-1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 0]","[0, 0, 0, 1, 0, 0, 0]","[-0.009822619027526902, -0.011808068847045439,...","[0.0, -0.030874772909367576, -0.02540956980137...","[0.01433840162464528, -0.02320928087307872, -0...","[-0.02779294879300065, -0.009500132565161024, ...","[0.014814800686306473, -0.0007402093791573083,...","[0.013768085344727954, -0.0036101219752859848,...","[0.0, 0.024617424626257156, -0.037155665372353...","[0.029277189925849045, -0.018850906281717816, ...","[-0.004890403360175766, 0.014368802014076643, ...","[-0.006591308434890197, -0.018484271248252138,..."
354,2021-01-29,test,[0.7916486014127254],"[0.83617664721454, 0.7899655032194304, 0.79780...","[1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0]","[0, 0, 0, 0, 1, 0, 0]","[0.05849767338799028, -0.009822619027526902, -...","[0.040353960695519886, 0.0, -0.030874772909367...","[0.031098564242559634, 0.01433840162464528, -0...","[0.02130046626550186, -0.02779294879300065, -0...","[0.01313870858991173, 0.014814800686306473, -0...","[0.02215871513140999, 0.013768085344727954, -0...","[0.009090931609783803, 0.0, 0.0246174246262571...","[0.023407434534143512, 0.029277189925849045, -...","[0.03863756412211505, -0.004890403360175766, 0...","[0.03981043305219978, -0.006591308434890197, -..."


#### Save feature table on disk

In [9]:
feature_table.to_csv("data/data.csv", index=None)

#### Save scaler attributes

In [10]:
with open("data/scaler.json", "w") as f:
    json.dump({"maximo": scaler.maximo, "minimo": scaler.minimo}, f)