In [67]:
import pandas as pd
import tensorflow as tf
import numpy as np
import yfinance as yfin
import matplotlib.pyplot as plt
import seaborn as sns
import time

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Ibovespa

In [68]:
yfin.pdr_override()
ibovespa = yfin.download(['^BVSP'])
ibovespa

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-04-27,24.799999,25.400000,24.500000,24.500000,24.500000,0
1993-04-28,24.500000,24.600000,23.700001,24.299999,24.299999,0
1993-04-29,24.299999,24.799999,23.700001,23.700001,23.700001,0
1993-04-30,23.700001,24.200001,23.700001,24.100000,24.100000,0
1993-05-03,24.100000,24.400000,23.799999,24.100000,24.100000,0
...,...,...,...,...,...,...
2023-09-25,116009.000000,116031.000000,115573.000000,115925.000000,115925.000000,9580000
2023-09-26,115922.000000,115922.000000,114162.000000,114193.000000,114193.000000,11241800
2023-09-27,114194.000000,115340.000000,113366.000000,114327.000000,114327.000000,13359300
2023-09-28,114875.000000,115954.000000,114811.000000,115731.000000,115731.000000,10387000


In [69]:
ibovespa = ibovespa.drop(['Volume','Adj Close'],axis=1)
ibovespa

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-04-27,24.799999,25.400000,24.500000,24.500000
1993-04-28,24.500000,24.600000,23.700001,24.299999
1993-04-29,24.299999,24.799999,23.700001,23.700001
1993-04-30,23.700001,24.200001,23.700001,24.100000
1993-05-03,24.100000,24.400000,23.799999,24.100000
...,...,...,...,...
2023-09-25,116009.000000,116031.000000,115573.000000,115925.000000
2023-09-26,115922.000000,115922.000000,114162.000000,114193.000000
2023-09-27,114194.000000,115340.000000,113366.000000,114327.000000
2023-09-28,114875.000000,115954.000000,114811.000000,115731.000000


In [70]:
ibovespa['Tomorrow Close'] = ibovespa['Close'].shift(-1)
ibovespa

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1993-04-27,24.799999,25.400000,24.500000,24.500000,24.299999
1993-04-28,24.500000,24.600000,23.700001,24.299999,23.700001
1993-04-29,24.299999,24.799999,23.700001,23.700001,24.100000
1993-04-30,23.700001,24.200001,23.700001,24.100000,24.100000
1993-05-03,24.100000,24.400000,23.799999,24.100000,24.900000
...,...,...,...,...,...
2023-09-25,116009.000000,116031.000000,115573.000000,115925.000000,114193.000000
2023-09-26,115922.000000,115922.000000,114162.000000,114193.000000,114327.000000
2023-09-27,114194.000000,115340.000000,113366.000000,114327.000000,115731.000000
2023-09-28,114875.000000,115954.000000,114811.000000,115731.000000,116565.000000


In [71]:
ibovespa['Increased'] = (ibovespa['Tomorrow Close'] > ibovespa['Close']).astype(int)
ibovespa

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-04-27,24.799999,25.400000,24.500000,24.500000,24.299999,0
1993-04-28,24.500000,24.600000,23.700001,24.299999,23.700001,0
1993-04-29,24.299999,24.799999,23.700001,23.700001,24.100000,1
1993-04-30,23.700001,24.200001,23.700001,24.100000,24.100000,0
1993-05-03,24.100000,24.400000,23.799999,24.100000,24.900000,1
...,...,...,...,...,...,...
2023-09-25,116009.000000,116031.000000,115573.000000,115925.000000,114193.000000,0
2023-09-26,115922.000000,115922.000000,114162.000000,114193.000000,114327.000000,1
2023-09-27,114194.000000,115340.000000,113366.000000,114327.000000,115731.000000,1
2023-09-28,114875.000000,115954.000000,114811.000000,115731.000000,116565.000000,1


# S&P500

In [72]:
yfin.pdr_override()
sp500 = yfin.download(['^GSPC'])
sp500

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000,17.660000,0
1928-01-03,17.760000,17.760000,17.760000,17.760000,17.760000,0
1928-01-04,17.719999,17.719999,17.719999,17.719999,17.719999,0
1928-01-05,17.549999,17.549999,17.549999,17.549999,17.549999,0
1928-01-06,17.660000,17.660000,17.660000,17.660000,17.660000,0
...,...,...,...,...,...,...
2023-09-25,4310.620117,4338.509766,4302.700195,4337.439941,4337.439941,3195650000
2023-09-26,4312.879883,4313.009766,4265.979980,4273.529785,4273.529785,3472340000
2023-09-27,4282.629883,4292.069824,4238.629883,4274.509766,4274.509766,3875880000
2023-09-28,4269.649902,4317.270020,4264.379883,4299.700195,4299.700195,3846230000


In [73]:
sp500 = sp500.drop(['Volume','Adj Close'],axis=1)
sp500

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000
1928-01-03,17.760000,17.760000,17.760000,17.760000
1928-01-04,17.719999,17.719999,17.719999,17.719999
1928-01-05,17.549999,17.549999,17.549999,17.549999
1928-01-06,17.660000,17.660000,17.660000,17.660000
...,...,...,...,...
2023-09-25,4310.620117,4338.509766,4302.700195,4337.439941
2023-09-26,4312.879883,4313.009766,4265.979980,4273.529785
2023-09-27,4282.629883,4292.069824,4238.629883,4274.509766
2023-09-28,4269.649902,4317.270020,4264.379883,4299.700195


In [74]:
sp500['Tomorrow Close'] = sp500['Close'].shift(-1)
sp500

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000,17.760000
1928-01-03,17.760000,17.760000,17.760000,17.760000,17.719999
1928-01-04,17.719999,17.719999,17.719999,17.719999,17.549999
1928-01-05,17.549999,17.549999,17.549999,17.549999,17.660000
1928-01-06,17.660000,17.660000,17.660000,17.660000,17.500000
...,...,...,...,...,...
2023-09-25,4310.620117,4338.509766,4302.700195,4337.439941,4273.529785
2023-09-26,4312.879883,4313.009766,4265.979980,4273.529785,4274.509766
2023-09-27,4282.629883,4292.069824,4238.629883,4274.509766,4299.700195
2023-09-28,4269.649902,4317.270020,4264.379883,4299.700195,4288.049805


In [75]:
sp500['Increased'] = (sp500['Tomorrow Close'] > sp500['Close']).astype(int)
sp500

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000,17.760000,1
1928-01-03,17.760000,17.760000,17.760000,17.760000,17.719999,0
1928-01-04,17.719999,17.719999,17.719999,17.719999,17.549999,0
1928-01-05,17.549999,17.549999,17.549999,17.549999,17.660000,1
1928-01-06,17.660000,17.660000,17.660000,17.660000,17.500000,0
...,...,...,...,...,...,...
2023-09-25,4310.620117,4338.509766,4302.700195,4337.439941,4273.529785,0
2023-09-26,4312.879883,4313.009766,4265.979980,4273.529785,4274.509766,1
2023-09-27,4282.629883,4292.069824,4238.629883,4274.509766,4299.700195,1
2023-09-28,4269.649902,4317.270020,4264.379883,4299.700195,4288.049805,0


# Gold

In [76]:
yfin.pdr_override()
gold = yfin.download(['GC=F'])
gold

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994,273.899994,0
2000-08-31,274.799988,278.299988,274.799988,278.299988,278.299988,0
2000-09-01,277.000000,277.000000,277.000000,277.000000,277.000000,0
2000-09-05,275.799988,275.799988,275.799988,275.799988,275.799988,2
2000-09-06,274.200012,274.200012,274.200012,274.200012,274.200012,0
...,...,...,...,...,...,...
2023-09-25,1916.599976,1916.599976,1916.599976,1916.599976,1916.599976,114
2023-09-26,1905.000000,1905.099976,1900.400024,1900.400024,1900.400024,161
2023-09-27,1871.599976,1871.599976,1871.599976,1871.599976,1871.599976,12251
2023-09-28,1875.500000,1878.199951,1856.699951,1860.400024,1860.400024,4654


In [77]:
gold = gold.drop(['Volume','Adj Close'],axis=1)
gold

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994
2000-08-31,274.799988,278.299988,274.799988,278.299988
2000-09-01,277.000000,277.000000,277.000000,277.000000
2000-09-05,275.799988,275.799988,275.799988,275.799988
2000-09-06,274.200012,274.200012,274.200012,274.200012
...,...,...,...,...
2023-09-25,1916.599976,1916.599976,1916.599976,1916.599976
2023-09-26,1905.000000,1905.099976,1900.400024,1900.400024
2023-09-27,1871.599976,1871.599976,1871.599976,1871.599976
2023-09-28,1875.500000,1878.199951,1856.699951,1860.400024


In [78]:
gold['Tomorrow Close'] = gold['Close'].shift(-1)
gold

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994,278.299988
2000-08-31,274.799988,278.299988,274.799988,278.299988,277.000000
2000-09-01,277.000000,277.000000,277.000000,277.000000,275.799988
2000-09-05,275.799988,275.799988,275.799988,275.799988,274.200012
2000-09-06,274.200012,274.200012,274.200012,274.200012,274.000000
...,...,...,...,...,...
2023-09-25,1916.599976,1916.599976,1916.599976,1916.599976,1900.400024
2023-09-26,1905.000000,1905.099976,1900.400024,1900.400024,1871.599976
2023-09-27,1871.599976,1871.599976,1871.599976,1871.599976,1860.400024
2023-09-28,1875.500000,1878.199951,1856.699951,1860.400024,1864.599976


In [79]:
gold['Increased'] = (gold['Tomorrow Close'] > gold['Close']).astype(int)
gold

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994,278.299988,1
2000-08-31,274.799988,278.299988,274.799988,278.299988,277.000000,0
2000-09-01,277.000000,277.000000,277.000000,277.000000,275.799988,0
2000-09-05,275.799988,275.799988,275.799988,275.799988,274.200012,0
2000-09-06,274.200012,274.200012,274.200012,274.200012,274.000000,0
...,...,...,...,...,...,...
2023-09-25,1916.599976,1916.599976,1916.599976,1916.599976,1900.400024,0
2023-09-26,1905.000000,1905.099976,1900.400024,1900.400024,1871.599976,0
2023-09-27,1871.599976,1871.599976,1871.599976,1871.599976,1860.400024,0
2023-09-28,1875.500000,1878.199951,1856.699951,1860.400024,1864.599976,1


# Euronext 100

In [80]:
yfin.pdr_override()
euronext = yfin.download(['^N100'])
euronext

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-12-31,1000.000000,1000.000000,1000.000000,1000.000000,1000.000000,0
2000-01-03,996.770020,996.770020,996.770020,996.770020,996.770020,0
2000-01-04,955.969971,955.969971,955.969971,955.969971,955.969971,0
2000-01-05,930.260010,930.260010,930.260010,930.260010,930.260010,0
2000-01-06,922.460022,922.460022,922.460022,922.460022,922.460022,0
...,...,...,...,...,...,...
2023-09-25,1331.000000,1333.219971,1313.390015,1320.489990,1320.489990,261597500
2023-09-26,1319.569946,1319.569946,1304.069946,1310.550049,1310.550049,268638800
2023-09-27,1310.540039,1317.839966,1309.079956,1312.130005,1312.130005,250592600
2023-09-28,1312.420044,1318.160034,1304.119995,1317.920044,1317.920044,277087300


In [81]:
euronext = euronext.drop(['Volume','Adj Close'],axis=1)
euronext

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999-12-31,1000.000000,1000.000000,1000.000000,1000.000000
2000-01-03,996.770020,996.770020,996.770020,996.770020
2000-01-04,955.969971,955.969971,955.969971,955.969971
2000-01-05,930.260010,930.260010,930.260010,930.260010
2000-01-06,922.460022,922.460022,922.460022,922.460022
...,...,...,...,...
2023-09-25,1331.000000,1333.219971,1313.390015,1320.489990
2023-09-26,1319.569946,1319.569946,1304.069946,1310.550049
2023-09-27,1310.540039,1317.839966,1309.079956,1312.130005
2023-09-28,1312.420044,1318.160034,1304.119995,1317.920044


In [82]:
euronext['Tomorrow Close'] = euronext['Close'].shift(-1)
euronext

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-12-31,1000.000000,1000.000000,1000.000000,1000.000000,996.770020
2000-01-03,996.770020,996.770020,996.770020,996.770020,955.969971
2000-01-04,955.969971,955.969971,955.969971,955.969971,930.260010
2000-01-05,930.260010,930.260010,930.260010,930.260010,922.460022
2000-01-06,922.460022,922.460022,922.460022,922.460022,943.880005
...,...,...,...,...,...
2023-09-25,1331.000000,1333.219971,1313.390015,1320.489990,1310.550049
2023-09-26,1319.569946,1319.569946,1304.069946,1310.550049,1312.130005
2023-09-27,1310.540039,1317.839966,1309.079956,1312.130005,1317.920044
2023-09-28,1312.420044,1318.160034,1304.119995,1317.920044,1323.709961


In [83]:
euronext['Increased'] = (euronext['Tomorrow Close'] > euronext['Close']).astype(int)
euronext

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-12-31,1000.000000,1000.000000,1000.000000,1000.000000,996.770020,0
2000-01-03,996.770020,996.770020,996.770020,996.770020,955.969971,0
2000-01-04,955.969971,955.969971,955.969971,955.969971,930.260010,0
2000-01-05,930.260010,930.260010,930.260010,930.260010,922.460022,0
2000-01-06,922.460022,922.460022,922.460022,922.460022,943.880005,1
...,...,...,...,...,...,...
2023-09-25,1331.000000,1333.219971,1313.390015,1320.489990,1310.550049,0
2023-09-26,1319.569946,1319.569946,1304.069946,1310.550049,1312.130005,1
2023-09-27,1310.540039,1317.839966,1309.079956,1312.130005,1317.920044,1
2023-09-28,1312.420044,1318.160034,1304.119995,1317.920044,1323.709961,1


# SSE Composite - Shangai

In [84]:
yfin.pdr_override()
shangai = yfin.download(['000001.SS'])
shangai

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-07-02,1255.909058,1261.571045,1147.331055,1199.061035,1199.061035,0
1997-07-03,1194.676025,1194.676025,1149.939941,1150.623047,1150.623047,0
1997-07-04,1138.921021,1163.249023,1124.776001,1159.342041,1159.342041,0
1997-07-07,1161.707031,1163.447021,1085.572021,1096.818970,1096.818970,0
1997-07-08,1092.798950,1115.432983,1066.043945,1109.666016,1109.666016,0
...,...,...,...,...,...,...
2023-09-22,3084.757080,3132.558105,3078.801025,3132.431885,3132.431885,286000
2023-09-25,3131.208008,3131.208008,3112.150879,3115.606934,3115.606934,258100
2023-09-26,3114.020996,3120.289062,3102.272949,3102.272949,3102.272949,222600
2023-09-27,3104.040039,3125.447021,3103.843994,3107.315918,3107.315918,240500


In [85]:
shangai = shangai.drop(['Volume','Adj Close'],axis=1)
shangai

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1997-07-02,1255.909058,1261.571045,1147.331055,1199.061035
1997-07-03,1194.676025,1194.676025,1149.939941,1150.623047
1997-07-04,1138.921021,1163.249023,1124.776001,1159.342041
1997-07-07,1161.707031,1163.447021,1085.572021,1096.818970
1997-07-08,1092.798950,1115.432983,1066.043945,1109.666016
...,...,...,...,...
2023-09-22,3084.757080,3132.558105,3078.801025,3132.431885
2023-09-25,3131.208008,3131.208008,3112.150879,3115.606934
2023-09-26,3114.020996,3120.289062,3102.272949,3102.272949
2023-09-27,3104.040039,3125.447021,3103.843994,3107.315918


In [86]:
shangai['Tomorrow Close'] = shangai['Close'].shift(-1)
shangai

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997-07-02,1255.909058,1261.571045,1147.331055,1199.061035,1150.623047
1997-07-03,1194.676025,1194.676025,1149.939941,1150.623047,1159.342041
1997-07-04,1138.921021,1163.249023,1124.776001,1159.342041,1096.818970
1997-07-07,1161.707031,1163.447021,1085.572021,1096.818970,1109.666016
1997-07-08,1092.798950,1115.432983,1066.043945,1109.666016,1120.840942
...,...,...,...,...,...
2023-09-22,3084.757080,3132.558105,3078.801025,3132.431885,3115.606934
2023-09-25,3131.208008,3131.208008,3112.150879,3115.606934,3102.272949
2023-09-26,3114.020996,3120.289062,3102.272949,3102.272949,3107.315918
2023-09-27,3104.040039,3125.447021,3103.843994,3107.315918,3110.475098


In [87]:
shangai['Increased'] = (shangai['Tomorrow Close'] > shangai['Close']).astype(int)
shangai

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-07-02,1255.909058,1261.571045,1147.331055,1199.061035,1150.623047,0
1997-07-03,1194.676025,1194.676025,1149.939941,1150.623047,1159.342041,1
1997-07-04,1138.921021,1163.249023,1124.776001,1159.342041,1096.818970,0
1997-07-07,1161.707031,1163.447021,1085.572021,1096.818970,1109.666016,1
1997-07-08,1092.798950,1115.432983,1066.043945,1109.666016,1120.840942,1
...,...,...,...,...,...,...
2023-09-22,3084.757080,3132.558105,3078.801025,3132.431885,3115.606934,0
2023-09-25,3131.208008,3131.208008,3112.150879,3115.606934,3102.272949,0
2023-09-26,3114.020996,3120.289062,3102.272949,3102.272949,3107.315918,1
2023-09-27,3104.040039,3125.447021,3103.843994,3107.315918,3110.475098,1


# USA Treasury 5 Years Bonds

In [88]:
yfin.pdr_override()
usa5 = yfin.download(['^FVX'])
usa5

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962-01-02,3.880,3.880,3.880,3.880,3.880,0
1962-01-03,3.870,3.870,3.870,3.870,3.870,0
1962-01-04,3.860,3.860,3.860,3.860,3.860,0
1962-01-05,3.890,3.890,3.890,3.890,3.890,0
1962-01-08,3.910,3.910,3.910,3.910,3.910,0
...,...,...,...,...,...,...
2023-09-25,4.602,4.625,4.582,4.620,4.620,0
2023-09-26,4.575,4.632,4.573,4.625,4.625,0
2023-09-27,4.589,4.717,4.573,4.703,4.703,0
2023-09-28,4.691,4.747,4.643,4.643,4.643,0


In [89]:
usa5 = usa5.drop(['Volume','Adj Close'],axis=1)
usa5

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1962-01-02,3.880,3.880,3.880,3.880
1962-01-03,3.870,3.870,3.870,3.870
1962-01-04,3.860,3.860,3.860,3.860
1962-01-05,3.890,3.890,3.890,3.890
1962-01-08,3.910,3.910,3.910,3.910
...,...,...,...,...
2023-09-25,4.602,4.625,4.582,4.620
2023-09-26,4.575,4.632,4.573,4.625
2023-09-27,4.589,4.717,4.573,4.703
2023-09-28,4.691,4.747,4.643,4.643


In [90]:
usa5['Tomorrow Close'] = usa5['Close'].shift(-1)
usa5

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,3.880,3.880,3.880,3.880,3.870
1962-01-03,3.870,3.870,3.870,3.870,3.860
1962-01-04,3.860,3.860,3.860,3.860,3.890
1962-01-05,3.890,3.890,3.890,3.890,3.910
1962-01-08,3.910,3.910,3.910,3.910,3.930
...,...,...,...,...,...
2023-09-25,4.602,4.625,4.582,4.620,4.625
2023-09-26,4.575,4.632,4.573,4.625,4.703
2023-09-27,4.589,4.717,4.573,4.703,4.643
2023-09-28,4.691,4.747,4.643,4.643,4.606


In [91]:
usa5['Increased'] = (usa5['Tomorrow Close'] > usa5['Close']).astype(int)
usa5

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962-01-02,3.880,3.880,3.880,3.880,3.870,0
1962-01-03,3.870,3.870,3.870,3.870,3.860,0
1962-01-04,3.860,3.860,3.860,3.860,3.890,1
1962-01-05,3.890,3.890,3.890,3.890,3.910,1
1962-01-08,3.910,3.910,3.910,3.910,3.930,1
...,...,...,...,...,...,...
2023-09-25,4.602,4.625,4.582,4.620,4.625,1
2023-09-26,4.575,4.632,4.573,4.625,4.703,1
2023-09-27,4.589,4.717,4.573,4.703,4.643,0
2023-09-28,4.691,4.747,4.643,4.643,4.606,0


# Dollar / BRL

In [92]:
yfin.pdr_override()
dollar_brl = yfin.download(['BRL=X'])
dollar_brl

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-01,2.9460,2.9460,2.923000,2.9230,2.9230,0
2003-12-02,2.9230,2.9310,2.923000,2.9310,2.9310,0
2003-12-03,2.9310,2.9360,2.926000,2.9310,2.9310,0
2003-12-04,2.9310,2.9430,2.931000,2.9430,2.9430,0
2003-12-05,2.9430,2.9480,2.934000,2.9340,2.9340,0
...,...,...,...,...,...,...
2023-09-26,4.9683,4.9829,4.936556,4.9683,4.9683,0
2023-09-27,4.9862,5.0559,4.983250,4.9862,4.9862,0
2023-09-28,5.0422,5.0652,5.014375,5.0422,5.0422,0
2023-09-29,5.0328,5.0356,4.988600,5.0328,5.0328,0


In [93]:
dollar_brl = dollar_brl.drop(['Volume','Adj Close'],axis=1)
dollar_brl

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-01,2.9460,2.9460,2.923000,2.9230
2003-12-02,2.9230,2.9310,2.923000,2.9310
2003-12-03,2.9310,2.9360,2.926000,2.9310
2003-12-04,2.9310,2.9430,2.931000,2.9430
2003-12-05,2.9430,2.9480,2.934000,2.9340
...,...,...,...,...
2023-09-26,4.9683,4.9829,4.936556,4.9683
2023-09-27,4.9862,5.0559,4.983250,4.9862
2023-09-28,5.0422,5.0652,5.014375,5.0422
2023-09-29,5.0328,5.0356,4.988600,5.0328


In [94]:
dollar_brl['Tomorrow Close'] = dollar_brl['Close'].shift(-1)
dollar_brl

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003-12-01,2.9460,2.9460,2.923000,2.9230,2.9310
2003-12-02,2.9230,2.9310,2.923000,2.9310,2.9310
2003-12-03,2.9310,2.9360,2.926000,2.9310,2.9430
2003-12-04,2.9310,2.9430,2.931000,2.9430,2.9340
2003-12-05,2.9430,2.9480,2.934000,2.9340,2.9400
...,...,...,...,...,...
2023-09-26,4.9683,4.9829,4.936556,4.9683,4.9862
2023-09-27,4.9862,5.0559,4.983250,4.9862,5.0422
2023-09-28,5.0422,5.0652,5.014375,5.0422,5.0328
2023-09-29,5.0328,5.0356,4.988600,5.0328,5.0320


In [95]:
dollar_brl['Increased'] = (dollar_brl['Tomorrow Close'] > dollar_brl['Close']).astype(int)
dollar_brl

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-01,2.9460,2.9460,2.923000,2.9230,2.9310,1
2003-12-02,2.9230,2.9310,2.923000,2.9310,2.9310,0
2003-12-03,2.9310,2.9360,2.926000,2.9310,2.9430,1
2003-12-04,2.9310,2.9430,2.931000,2.9430,2.9340,0
2003-12-05,2.9430,2.9480,2.934000,2.9340,2.9400,1
...,...,...,...,...,...,...
2023-09-26,4.9683,4.9829,4.936556,4.9683,4.9862,1
2023-09-27,4.9862,5.0559,4.983250,4.9862,5.0422,1
2023-09-28,5.0422,5.0652,5.014375,5.0422,5.0328,0
2023-09-29,5.0328,5.0356,4.988600,5.0328,5.0320,0


# Euro / BRL

In [96]:
yfin.pdr_override()
euro_brl = yfin.download(['EURBRL=X'])
euro_brl

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-01,3.4621,3.4621,3.4621,3.4621,3.4621,0
2003-12-02,3.4151,3.4151,3.4151,3.4151,3.4151,0
2003-12-03,3.4799,3.4799,3.4799,3.4799,3.4799,0
2003-12-04,3.4597,3.4597,3.4597,3.4597,3.4597,0
2003-12-05,3.4794,3.4794,3.4794,3.4794,3.4794,0
...,...,...,...,...,...,...
2023-09-25,5.2256,5.2623,5.2249,5.2256,5.2256,0
2023-09-26,5.2249,5.2786,5.2249,5.2567,5.2567,0
2023-09-27,5.2417,5.3098,5.2387,5.2698,5.2698,0
2023-09-28,5.2387,5.3357,5.2387,5.2971,5.2971,0


In [97]:
euro_brl = euro_brl.drop(['Volume','Adj Close'],axis=1)
euro_brl

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-01,3.4621,3.4621,3.4621,3.4621
2003-12-02,3.4151,3.4151,3.4151,3.4151
2003-12-03,3.4799,3.4799,3.4799,3.4799
2003-12-04,3.4597,3.4597,3.4597,3.4597
2003-12-05,3.4794,3.4794,3.4794,3.4794
...,...,...,...,...
2023-09-25,5.2256,5.2623,5.2249,5.2256
2023-09-26,5.2249,5.2786,5.2249,5.2567
2023-09-27,5.2417,5.3098,5.2387,5.2698
2023-09-28,5.2387,5.3357,5.2387,5.2971


In [98]:
euro_brl['Tomorrow Close'] = euro_brl['Close'].shift(-1)
euro_brl

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003-12-01,3.4621,3.4621,3.4621,3.4621,3.4151
2003-12-02,3.4151,3.4151,3.4151,3.4151,3.4799
2003-12-03,3.4799,3.4799,3.4799,3.4799,3.4597
2003-12-04,3.4597,3.4597,3.4597,3.4597,3.4794
2003-12-05,3.4794,3.4794,3.4794,3.4794,3.5075
...,...,...,...,...,...
2023-09-25,5.2256,5.2623,5.2249,5.2256,5.2567
2023-09-26,5.2249,5.2786,5.2249,5.2567,5.2698
2023-09-27,5.2417,5.3098,5.2387,5.2698,5.2971
2023-09-28,5.2387,5.3357,5.2387,5.2971,5.3214


In [99]:
euro_brl['Increased'] = (euro_brl['Tomorrow Close'] > euro_brl['Close']).astype(int)
euro_brl

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow Close,Increased
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-01,3.4621,3.4621,3.4621,3.4621,3.4151,0
2003-12-02,3.4151,3.4151,3.4151,3.4151,3.4799,1
2003-12-03,3.4799,3.4799,3.4799,3.4799,3.4597,0
2003-12-04,3.4597,3.4597,3.4597,3.4597,3.4794,1
2003-12-05,3.4794,3.4794,3.4794,3.4794,3.5075,1
...,...,...,...,...,...,...
2023-09-25,5.2256,5.2623,5.2249,5.2256,5.2567,1
2023-09-26,5.2249,5.2786,5.2249,5.2567,5.2698,1
2023-09-27,5.2417,5.3098,5.2387,5.2698,5.2971,1
2023-09-28,5.2387,5.3357,5.2387,5.2971,5.3214,1


# Trend and close ratio

In [100]:
dataframes = [ibovespa,sp500,gold,euronext,shangai,usa5,dollar_brl,euro_brl]
dataframes_name = ['ibovespa','sp500','gold','euronext','shangai','usa5','dollar_brl','euro_brl']

In [101]:
def get_trend_and_close_ratio(dataframe):

  horizons = [3,7,10]
  new_predictors = []

  for horizon in horizons:
    rolling_averages = dataframe.rolling(horizon).mean()

    ratio_column = f'Close_Ratio{horizon}'
    dataframe[ratio_column] = dataframe['Close'] / rolling_averages['Close']

    trend_column = f'Trend_{horizon}'
    dataframe[trend_column] = dataframe.shift(1).rolling(horizon).sum()['Increased']

  return dataframe

In [102]:
for dataframe in dataframes:
  dataframe = get_trend_and_close_ratio(dataframe)
  dataframe = dataframe.dropna()
  dataframe = dataframe.loc['2020-01-01':,:]

In [103]:
df = pd.DataFrame()
for i in range(len(dataframes)):
  for column in ibovespa.columns[5:]:
    df[f'{column}_{dataframes_name[i]}'] = dataframes[i][f'{column}']

In [104]:
df = df.dropna()

In [105]:
date = '2020-01-01'

In [106]:
df_y = df['Increased_ibovespa']
df_y = df_y[date:]
df_y

Date
2020-01-02    0
2020-01-03    0
2020-01-06    0
2020-01-07    0
2020-01-08    0
             ..
2023-09-22    0
2023-09-25    0
2023-09-26    1
2023-09-27    1
2023-09-28    1
Name: Increased_ibovespa, Length: 845, dtype: int64

In [107]:
df_x = df

for name in dataframes_name:
  df_x = df_x.drop([f'Increased_{name}'],axis=1)

df_x = df_x[date:]
df_x

Unnamed: 0_level_0,Close_Ratio3_ibovespa,Trend_3_ibovespa,Close_Ratio7_ibovespa,Trend_7_ibovespa,Close_Ratio10_ibovespa,Trend_10_ibovespa,Close_Ratio3_sp500,Trend_3_sp500,Close_Ratio7_sp500,Trend_7_sp500,...,Close_Ratio7_dollar_brl,Trend_7_dollar_brl,Close_Ratio10_dollar_brl,Trend_10_dollar_brl,Close_Ratio3_euro_brl,Trend_3_euro_brl,Close_Ratio7_euro_brl,Trend_7_euro_brl,Close_Ratio10_euro_brl,Trend_10_euro_brl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,1.013239,1.0,1.019182,4.0,1.028194,6.0,1.006553,2.0,1.007409,5.0,...,0.993726,3.0,0.991133,6.0,0.998125,1.0,0.995718,2.0,0.994232,4.0
2020-01-03,1.002490,1.0,1.008549,3.0,1.015567,6.0,0.998053,2.0,0.999818,4.0,...,0.997577,3.0,0.993999,6.0,0.999436,2.0,0.997554,3.0,0.995755,4.0
2020-01-06,0.992853,1.0,0.999296,3.0,1.004720,5.0,0.999986,2.0,1.002337,5.0,...,1.006836,3.0,1.002868,6.0,1.006460,2.0,1.005663,4.0,1.005177,4.0
2020-01-07,0.996410,0.0,0.996477,2.0,1.000844,4.0,0.999303,1.0,0.999647,4.0,...,1.007476,4.0,1.004244,6.0,1.004495,3.0,1.007176,4.0,1.006906,5.0
2020-01-08,0.997010,0.0,0.994092,1.0,0.996330,3.0,1.002325,2.0,1.003971,4.0,...,1.007347,5.0,1.006092,6.0,0.999059,2.0,1.004235,4.0,1.004243,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-22,0.991957,1.0,0.984160,2.0,0.984663,5.0,0.992945,0.0,0.978491,2.0,...,1.010546,3.0,1.006243,5.0,1.007353,2.0,1.009244,3.0,1.003756,4.0
2023-09-25,0.999127,0.0,0.987597,1.0,0.984750,4.0,1.001911,1.0,0.987786,2.0,...,1.009795,3.0,1.007048,4.0,0.999841,1.0,1.004606,3.0,0.999321,4.0
2023-09-26,0.989749,0.0,0.978277,1.0,0.973158,3.0,0.991459,1.0,0.978862,2.0,...,1.013868,4.0,1.013265,5.0,1.001995,2.0,1.008496,4.0,1.006061,5.0
2023-09-27,0.995750,1.0,0.984196,2.0,0.977506,3.0,0.995192,2.0,0.984855,2.0,...,1.013997,5.0,1.016095,5.0,1.003638,2.0,1.008122,5.0,1.009349,5.0


In [108]:
predictors = df_x.columns
predictors

Index(['Close_Ratio3_ibovespa', 'Trend_3_ibovespa', 'Close_Ratio7_ibovespa',
       'Trend_7_ibovespa', 'Close_Ratio10_ibovespa', 'Trend_10_ibovespa',
       'Close_Ratio3_sp500', 'Trend_3_sp500', 'Close_Ratio7_sp500',
       'Trend_7_sp500', 'Close_Ratio10_sp500', 'Trend_10_sp500',
       'Close_Ratio3_gold', 'Trend_3_gold', 'Close_Ratio7_gold',
       'Trend_7_gold', 'Close_Ratio10_gold', 'Trend_10_gold',
       'Close_Ratio3_euronext', 'Trend_3_euronext', 'Close_Ratio7_euronext',
       'Trend_7_euronext', 'Close_Ratio10_euronext', 'Trend_10_euronext',
       'Close_Ratio3_shangai', 'Trend_3_shangai', 'Close_Ratio7_shangai',
       'Trend_7_shangai', 'Close_Ratio10_shangai', 'Trend_10_shangai',
       'Close_Ratio3_usa5', 'Trend_3_usa5', 'Close_Ratio7_usa5',
       'Trend_7_usa5', 'Close_Ratio10_usa5', 'Trend_10_usa5',
       'Close_Ratio3_dollar_brl', 'Trend_3_dollar_brl',
       'Close_Ratio7_dollar_brl', 'Trend_7_dollar_brl',
       'Close_Ratio10_dollar_brl', 'Trend_10_dollar_br

In [109]:
from sklearn.preprocessing import MinMaxScaler

sc = StandardScaler()
df_x = sc.fit_transform(df_x)
df_x = pd.DataFrame(df_x, columns=predictors)

In [110]:
train_size = int(round(len(df_y)*0.7,0))
test_size = len(df_y) - train_size

In [111]:
# Splits the X dataset into train and test
x_train = np.array(df_x[0:train_size])
x_test = np.array(df_x[train_size:])

# Splits the Y dataset into train and test
y_train = np.array(df_y[:train_size])
y_test = np.array(df_y[train_size:])

#Logistic Regression

In [112]:
from sklearn.linear_model import LogisticRegression

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'penalty': ['l1', 'l2'],           # Tipo de penalização
    'C': [0.1, 1, 10],                # Parâmetro de inversão da regularização
    'solver': ['liblinear', 'saga'],   # Algoritmo de otimização
    'max_iter': [100, 200, 300]       # Número máximo de iterações
}

# Crie o modelo de Regressão Logística
LR_model = LogisticRegression(random_state=1)

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = GridSearchCV(estimator=LR_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

print("")
# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_LR = grid_search.best_estimator_

start_time = time.time()
y_pred = best_LR.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")


Melhores Hiperparâmetros:
{'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'saga'}

Acurácia nos Dados de Teste: 0.4743

True Positive Rate: 0.4685

Tempo de execução: 0.001 segundos


# Gradient Boosting

In [113]:
from sklearn.ensemble import GradientBoostingClassifier

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'n_estimators': [50, 100, 200],          # Número de estimadores (árvores)
    'learning_rate': [0.01, 0.1, 0.2],      # Taxa de aprendizado
    'max_depth': [3, 4, 5],                # Profundidade máxima das árvores
    'min_samples_split': [2, 3, 4],         # Número mínimo de amostras necessárias para dividir um nó
    'min_samples_leaf': [1, 2, 3],          # Número mínimo de amostras em uma folha
}

# Crie o modelo de Regressão Logística
GB_model = GradientBoostingClassifier(random_state=1)

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = RandomizedSearchCV(estimator=GB_model, param_distributions=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_GB = grid_search.best_estimator_

start_time = time.time()
y_pred = best_GB.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 5, 'learning_rate': 0.2}

Acurácia nos Dados de Teste: 0.4743

True Positive Rate: 0.4651

Tempo de execução: 0.0037 segundos


#Naive Bayes - Gaussian

In [114]:
from sklearn.naive_bayes import GaussianNB

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6],   # Parâmetro de suavização
}

# Crie o modelo Naive Bayes (GaussianNB)
GaussianNB_model = GaussianNB()

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = GridSearchCV(estimator=GaussianNB_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_GNB = grid_search.best_estimator_

start_time = time.time()
y_pred = best_GNB.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'var_smoothing': 1e-09}

Acurácia nos Dados de Teste: 0.5059

True Positive Rate: 0.4815

Tempo de execução: 0.0007 segundos


#Naive Bayes - Bernoulli

In [115]:
from sklearn.naive_bayes import BernoulliNB

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'alpha': [1.0, 0.1, 0.01, 0.001],   # Parâmetro de suavização Laplace
    'binarize': [0.0, 0.1, 0.2, 0.3],  # Valor de limiarização para binarização
}

# Crie o modelo Naive Bayes (GaussianNB)
BernoulliNB_model = BernoulliNB()

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = GridSearchCV(estimator=BernoulliNB_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_BNB = grid_search.best_estimator_

start_time = time.time()
y_pred = best_BNB.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'alpha': 0.1, 'binarize': 0.0}

Acurácia nos Dados de Teste: 0.5059

True Positive Rate: 0.4959

Tempo de execução: 0.0011 segundos


#X Gradient Boosting

In [116]:
from xgboost import XGBClassifier

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'n_estimators': [50, 100, 200],            # Número de árvores (estimadores)
    'learning_rate': [0.01, 0.1, 0.2],         # Taxa de aprendizado
    'max_depth': [3, 4, 5],                   # Profundidade máxima das árvores
    'min_child_weight': [1, 2, 3],            # Peso mínimo da criança
}

# Crie o modelo XGBoost
XGB_model = XGBClassifier(random_state=1)

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = RandomizedSearchCV(estimator=XGB_model, param_distributions=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_XGB = grid_search.best_estimator_

start_time = time.time()
y_pred = best_XGB.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'n_estimators': 200, 'min_child_weight': 2, 'max_depth': 3, 'learning_rate': 0.2}

Acurácia nos Dados de Teste: 0.4901

True Positive Rate: 0.4793

Tempo de execução: 0.0029 segundos


#Light Gradient Boosting

In [117]:
from lightgbm import LGBMClassifier

# Defina os hiperparâmetros que você deseja ajustar
param_grid = {
    'n_estimators': [50, 100, 200],            # Número de árvores (estimadores)
    'learning_rate': [0.01, 0.1, 0.2],         # Taxa de aprendizado
    'max_depth': [3, 4, 5],                   # Profundidade máxima das árvores
    'min_child_weight': [1, 2, 3],            # Peso mínimo da criança
}

# Crie o modelo LightGBM
LGB_model = LGBMClassifier(random_state=1)

# Crie um objeto GridSearchCV para realizar a pesquisa em grade
grid_search = RandomizedSearchCV(estimator=LGB_model, param_distributions=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa em grade nos dados de treinamento
grid_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(grid_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_LGB = grid_search.best_estimator_

start_time = time.time()
y_pred = best_LGB.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

[LightGBM] [Info] Number of positive: 314, number of negative: 278
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4928
[LightGBM] [Info] Number of data points in the train set: 592, number of used features: 48
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.530405 -> initscore=0.121772
[LightGBM] [Info] Start training from score 0.121772
Melhores Hiperparâmetros:
{'n_estimators': 200, 'min_child_weight': 2, 'max_depth': 3, 'learning_rate': 0.01}

Acurácia nos Dados de Teste: 0.4585

True Positive Rate: 0.462

Tempo de execução: 0.0049 segundos


#SVM Classifier

In [118]:
from sklearn.svm import SVC

# Defina os hiperparâmetros e as distribuições para pesquisa aleatória
param_dist = {
    'C':[0.1, 1, 10],                # Parâmetro de margem (distribuição exponencial)
    'kernel': ['linear', 'rbf'],        # Tipo de kernel
    'gamma': [0.001, 0.01, 0.1],       # Parâmetro do kernel RBF (distribuição uniforme)
}

# Crie o modelo SVM
SVC_model = SVC(random_state=1)

n_iter = 50

# Crie um objeto RandomizedSearchCV para realizar a pesquisa aleatória
random_search = GridSearchCV(estimator=SVC_model, param_grid=param_dist, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa aleatória nos dados de treinamento
random_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(random_search.best_params_)
print("")

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_SVM = random_search.best_estimator_

start_time = time.time()
y_pred = best_SVM.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}

Acurácia nos Dados de Teste: 0.4901

True Positive Rate: 0.4901

Tempo de execução: 0.011 segundos


#Random Forest Classifier

In [119]:
from sklearn.ensemble import RandomForestClassifier

# Defina os hiperparâmetros e as distribuições para pesquisa aleatória
param_dist = {
    'max_depth': [2, 3, 5, 10, 20],
    'min_samples_leaf': [5, 10, 20, 50, 100, 200],
    'n_estimators': [10, 25, 30, 50, 100, 200],
    'max_features': [2, 3,4,5],
}

# Crie o modelo SVM
RF_model = RandomForestClassifier(random_state=1)

# Crie um objeto RandomizedSearchCV para realizar a pesquisa aleatória
random_search = GridSearchCV(estimator=RF_model, param_grid=param_dist, cv=5, scoring='accuracy', n_jobs=-1)

# Realize a pesquisa aleatória nos dados de treinamento
random_search.fit(x_train, y_train)

# Imprima os melhores hiperparâmetros encontrados
print("Melhores Hiperparâmetros:")
print(random_search.best_params_)
print("")

best_RF = random_search.best_estimator_

start_time = time.time()
y_pred = best_RF.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Melhores Hiperparâmetros:
{'max_depth': 3, 'max_features': 2, 'min_samples_leaf': 5, 'n_estimators': 30}

Acurácia nos Dados de Teste: 0.4704

True Positive Rate: 0.4742

Tempo de execução: 0.006 segundos


# Data Resizing

In [120]:
n_agg = 10
x_agg = []

for i in range(len(df_x)-n_agg+1):
  x_agg.append(df_x[i:i+n_agg])

x_agg = np.array(x_agg)

In [121]:
train_size = int(round(x_agg.shape[0]*0.7,0))
test_size = 1 - train_size

In [122]:
x_train_agg = x_agg[:train_size]
x_test_agg = x_agg[train_size:]

In [123]:
y_agg = df_y[n_agg-1:]

In [124]:
from scipy.special import y1
y_train_agg = y_agg[:train_size]
y_test_agg = y_agg[train_size:]

In [125]:
x_train_agg.shape, y_train_agg.shape

((585, 10, 48), (585,))

In [126]:
x_test_agg.shape, y_test_agg.shape

((251, 10, 48), (251,))

#LSTM

In [127]:
from tensorflow.keras.layers import LSTM

n = 1
layer_1 = [8, 16, 24, 32, 64, 128]
layer_2 = [4, 8, 12, 16, 32, 64]

for i,j in zip(layer_1, layer_2):

    print(f'LSTM {n}')
    print('Layer 1 = ', i)
    print('Layer 2 = ', j)

    # Creates the MLP with 2 hidden layers
    LSTM_model = Sequential([
        LSTM(i, activation='relu', input_shape=(x_train_agg.shape[1], x_train_agg.shape[2]), return_sequences=True),
        LSTM(j, activation='relu', return_sequences=False),
        Dropout(0.2),
        Dense(1)])

    # Compiles the model
    LSTM_model.compile(optimizer='adam',
                  loss='mse',
                  metrics=['accuracy'])

    LSTM_model.fit(x_train_agg, y_train_agg.values, validation_split=0.1, epochs=10, batch_size=16, verbose=0)

    start_time = time.time()
    y_pred = LSTM_model.predict(x_test_agg)
    end_time = time.time()

    threshold = 0.5
    y_pred = (y_pred > threshold).astype(int)

    accuracy = accuracy_score(y_test_agg, y_pred)
    print('Accuracy: '+str(round(accuracy,4)))

    # Obtendo valores da matriz de confusão
    tn, fp, fn, tp = confusion_matrix(y_test_agg, y_pred).ravel()
    # Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
    tpr = round(tp / (tp + fp),4)
    print("True Positive Rate:", round(tpr,4))

    execution_time = end_time - start_time
    print("Tempo de execução:", round(execution_time, 4), "segundos")
    print('')

    n+=1

LSTM 1
Layer 1 =  8
Layer 2 =  4
Accuracy: 0.4861
True Positive Rate: 0.4786
Tempo de execução: 0.4335 segundos

LSTM 2
Layer 1 =  16
Layer 2 =  8
Accuracy: 0.49
True Positive Rate: 0.4836
Tempo de execução: 0.4003 segundos

LSTM 3
Layer 1 =  24
Layer 2 =  12
Accuracy: 0.5259
True Positive Rate: 0.5194
Tempo de execução: 0.6627 segundos

LSTM 4
Layer 1 =  32
Layer 2 =  16
Accuracy: 0.5179
True Positive Rate: 0.5105
Tempo de execução: 0.4248 segundos

LSTM 5
Layer 1 =  64
Layer 2 =  32
Accuracy: 0.5458
True Positive Rate: 0.5357
Tempo de execução: 0.3793 segundos

LSTM 6
Layer 1 =  128
Layer 2 =  64
Accuracy: 0.51
True Positive Rate: 0.5039
Tempo de execução: 0.493 segundos



#GRU

In [128]:
# Implements the random forest model one more time, but with different parameters

from tensorflow.keras.layers import GRU

n = 1
layer_1 = [8, 16, 24, 32, 64, 128]
layer_2 = [4, 8, 12, 16, 32, 64]

for i,j in zip(layer_1, layer_2):

    print(f'GRU {n}')
    print('Layer 1 = ', i)
    print('Layer 2 = ', j)

    # Creates the MLP with 2 hidden layers
    GRU_model = Sequential([
        GRU(i, activation='relu', input_shape=(x_train_agg.shape[1], x_train_agg.shape[2]), return_sequences=True),
        GRU(j, activation='relu', return_sequences=False),
        Dropout(0.2),
        Dense(1)])

    # Compiles the model
    GRU_model.compile(optimizer='adam',
                  loss='mse',
                  metrics=['accuracy'])

    GRU_model.fit(x_train_agg, y_train_agg.values, validation_split=0.1, epochs=10, batch_size=16, verbose=0)

    start_time = time.time()
    y_pred = GRU_model.predict(x_test_agg)
    end_time = time.time()

    threshold = 0.5
    y_pred = (y_pred > threshold).astype(int)

    accuracy = accuracy_score(y_test_agg, y_pred)
    print('Accuracy: '+str(round(accuracy,4)))

    # Obtendo valores da matriz de confusão
    tn, fp, fn, tp = confusion_matrix(y_test_agg, y_pred).ravel()
    # Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
    tpr = round(tp / (tp + fp),4)
    print("True Positive Rate:", tpr)

    execution_time = end_time - start_time
    print("Tempo de execução:", round(execution_time, 4), "segundos")
    print('')

    n+=1

GRU 1
Layer 1 =  8
Layer 2 =  4
Accuracy: 0.4781
True Positive Rate: 0.4444
Tempo de execução: 0.4324 segundos

GRU 2
Layer 1 =  16
Layer 2 =  8
Accuracy: 0.5378
True Positive Rate: 0.5274
Tempo de execução: 0.4211 segundos

GRU 3
Layer 1 =  24
Layer 2 =  12
Accuracy: 0.502
True Positive Rate: 0.4961
Tempo de execução: 0.3988 segundos

GRU 4
Layer 1 =  32
Layer 2 =  16
Accuracy: 0.4861
True Positive Rate: 0.4834
Tempo de execução: 0.5963 segundos

GRU 5
Layer 1 =  64
Layer 2 =  32
Accuracy: 0.4861
True Positive Rate: 0.4818
Tempo de execução: 0.4159 segundos

GRU 6
Layer 1 =  128
Layer 2 =  64
Accuracy: 0.5219
True Positive Rate: 0.5132
Tempo de execução: 0.3994 segundos



#MLP

In [131]:
# Implements the random forest model one more time, but with different parameters

from sklearn.neural_network import MLPClassifier

params = {
    'hidden_layer_sizes': [(80, 160, 1), (80, 240, 1), (80, 160, 80, 1), (80, 240, 120, 1), (80, 240, 120, 40, 1)],
    'activation': ['relu', 'tanh', 'logistic', 'identity'],
    'learning_rate': ['constant', 'adaptive'],
    'alpha': [0.001, 0.01],
    'solver': ['adam', 'lbfgs', 'sgd']
}

# Crie o classificador MLP
MLP_model = MLPClassifier(max_iter=10000)

# Execute a pesquisa aleatória
grid_search = RandomizedSearchCV(estimator=MLP_model,
                                 param_distributions=params,
                                 cv=4,
                                 scoring="accuracy",
                                 n_jobs=-1,
                                 verbose=1)

grid_search.fit(x_train, y_train)

# Avalie o modelo com os melhores hiperparâmetros nos dados de teste
best_MLP = grid_search.best_estimator_

start_time = time.time()
y_pred = best_MLP.predict(x_test)
end_time = time.time()

test_accuracy = accuracy_score(y_test, y_pred)
print("Acurácia nos Dados de Teste:", round(test_accuracy,4))
print("")

# Obtendo valores da matriz de confusão
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# Calculando a True Positive Rate (Taxa de Verdadeiros Positivos)
tpr = round(tp / (tp + fp),4)
print("True Positive Rate:", round(tpr,4))
print('')

execution_time = end_time - start_time
print("Tempo de execução:", round(execution_time, 4), "segundos")

Fitting 4 folds for each of 10 candidates, totalling 40 fits
Acurácia nos Dados de Teste: 0.4901

True Positive Rate: 0.4901

Tempo de execução: 0.005 segundos


# Benchmark

In [130]:
true_ratio = round(len(y_test[y_test == 1])/len(y_test),4)
true_ratio

0.4901