**IFPE Campus Paulista**

**Curso Análise e Desenvolvimento de Sistemas - Tópicos especiais em IA**

**Prof. Dr. Antônio Barreto**

**Discentes:** Adriel Leite, Hilda Miranda, Kamila Rocha e Murilo Alves.

**Objetivo:** Prever o valor das ações da Netflix baseado no seu histórico.

## Imports do projeto

In [None]:
!pip install -U skorch

Collecting skorch
  Downloading skorch-1.0.0-py3-none-any.whl.metadata (11 kB)
Downloading skorch-1.0.0-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: skorch
Successfully installed skorch-1.0.0


In [None]:
import pandas as pd
from plotly.subplots import make_subplots
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error
from skorch import NeuralNetRegressor
from torch.utils.data import DataLoader, TensorDataset


## Criação do DataFrame original


In [None]:
df = pd.read_csv('/content/NFLX.csv')

## Analisando e explorando a base de dados para obter informações

* **Date:** Dados temporais;
* **Open:** Preço da ação no momento de abertura do mercado;
* **High:** Maior preço da ação durante o dia;
* **Low** Menor preço da ação durante o dia;
* **Close:** Preço da ação no momento de fechamento do mercado;
* **Adj Close:** Preço ajustado da ação;
* **Volume:** Número de ações negociadas durante o dia.

In [None]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2002-05-23,1.156429,1.242857,1.145714,1.196429,1.196429,104790000
1,2002-05-24,1.214286,1.225,1.197143,1.21,1.21,11104800
2,2002-05-28,1.213571,1.232143,1.157143,1.157143,1.157143,6609400
3,2002-05-29,1.164286,1.164286,1.085714,1.103571,1.103571,6757800
4,2002-05-30,1.107857,1.107857,1.071429,1.071429,1.071429,10154200


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5044 entries, 0 to 5043
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       5044 non-null   object 
 1   Open       5044 non-null   float64
 2   High       5044 non-null   float64
 3   Low        5044 non-null   float64
 4   Close      5044 non-null   float64
 5   Adj Close  5044 non-null   float64
 6   Volume     5044 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 276.0+ KB


Com a informação acima vemos que não temos na nossa base de dados nenhum dado faltante


In [None]:
df.duplicated().sum()

0

Também sem dados duplicados

# Tratamento nas datas

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day_of_week'] = df['Date'].dt.dayofweek

Gerando gráfico para visualização do preço da ação ao longo dos anos.

In [None]:
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=df['Date'],y=df['Close'].rolling(window=14).mean(),name="netflix"),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=500,title_text="NETFLIX")
fig.update_xaxes(title_text="Ano")
fig.update_yaxes(title_text="preço",secondary_y=False)
fig.show()

# Definição de X e y e Normalização

In [None]:
X = df.drop(labels=['Date','Close'], axis=1)
y = df['Close']
scaler = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))
y_scaled = scaler_y.fit_transform(y_scaled.reshape(-1, 1))

# Divisão dos dados : Treinamento e Teste
Como o banco de dados só possui 5.044 linhas foi esscolhido a proporção: 70% treinamento e 30% teste.


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y_scaled,test_size=0.3,random_state=42)

In [None]:
print('\033[1m' + 'X_train' + '\033[0m', np.shape(X_train),':', f'{(len(X_train) / len(df) * 100):.0f}%')
print('\033[1m' + 'y_train' + '\033[0m',np.shape(y_train),':', f'{(len(y_train) / len(df) * 100):.0f}%')
print('\033[1m' + 'X_test' + '\033[0m',np.shape(X_test),':', f'{(len(X_test) / len(df) * 100):.0f}%')
print('\033[1m' + 'y_test' + '\033[0m',np.shape(y_test),':', f'{(len(y_test) / len(df) * 100):.0f}%')

[1mX_train[0m (3530, 8) : 70%
[1my_train[0m (3530, 1) : 70%
[1mX_test[0m (1514, 8) : 30%
[1my_test[0m (1514, 1) : 30%


# RegressorNeuralNet model com 2 camadas ocultas 64 neurônios em 300 epochs

In [None]:
class RegressorNeuralNet(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super(RegressorNeuralNet, self).__init__()
    self.dense0 = nn.Linear(input_dim, hidden_dim)
    self.relu = nn.ReLU()
    self.dense1 = nn.Linear(hidden_dim, hidden_dim)
    self.dropout = nn.Dropout(0.2)
    self.dense2 = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    x = self.dense0(x)
    x = self.relu(x)
    x = self.dense1(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.dense2(x)
    return x

In [None]:
net = NeuralNetRegressor(
    module=RegressorNeuralNet,
    module__input_dim=X.shape[1],
    module__hidden_dim=64,
    module__output_dim=1,
    criterion=nn.MSELoss,
    optimizer=torch.optim.Adam,
    optimizer__lr=0.001,
    max_epochs=300
)

In [None]:
net.fit(X_train.astype(np.float32), y_train.astype(np.float32).reshape(-1,1))

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5568[0m        [32m0.1009[0m  0.4065
      2        [36m0.0552[0m        [32m0.0094[0m  0.2105
      3        [36m0.0235[0m        [32m0.0031[0m  0.1827
      4        [36m0.0218[0m        [32m0.0027[0m  0.2377
      5        [36m0.0195[0m        [32m0.0026[0m  0.4262
      6        [36m0.0185[0m        [32m0.0017[0m  0.1537
      7        [36m0.0178[0m        [32m0.0014[0m  0.1743
      8        [36m0.0145[0m        0.0017  0.1703
      9        0.0149        0.0019  0.2197
     10        [36m0.0144[0m        [32m0.0013[0m  0.1741
     11        [36m0.0136[0m        [32m0.0008[0m  0.2250
     12        0.0137        0.0008  0.2220
     13        0.0136        [32m0.0006[0m  0.2203
     14        [36m0.0133[0m        0.0012  0.2176
     15        [36m0.0130[0m        0.0008  0.2199
     16        [36m0.0119[0m        [32m0.0006[0m 

<class 'skorch.regressor.NeuralNetRegressor'>[initialized](
  module_=RegressorNeuralNet(
    (dense0): Linear(in_features=8, out_features=64, bias=True)
    (relu): ReLU()
    (dense1): Linear(in_features=64, out_features=64, bias=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (dense2): Linear(in_features=64, out_features=1, bias=True)
  ),
)

# Avaliação do RegressorNeuralNet model

In [None]:
train_y_pred_scaled = net.predict(X_train.astype(np.float32))
train_y_pred = scaler_y.inverse_transform(train_y_pred_scaled)
y_train_orig = scaler_y.inverse_transform(y_train.reshape(-1, 1))

mae_RegressorNeuralNet_train = mean_absolute_error(y_train_orig, train_y_pred)
mse_RegressorNeuralNet_train = mean_squared_error(y_train_orig, train_y_pred)

print(f"MAE (Treino): {mae_RegressorNeuralNet_train:.4f}")
print(f"MSE (Treino): {mse_RegressorNeuralNet_train:.4f}")

MAE (Treino): 0.0159
MSE (Treino): 0.0005


In [None]:
test_y_pred_scaled = net.predict(X_test.astype(np.float32))
test_y_pred = scaler_y.inverse_transform(test_y_pred_scaled)
y_test_orig = scaler_y.inverse_transform(y_test.reshape(-1, 1))

mae_RegressorNeuralNet_test = mean_absolute_error(y_test_orig, test_y_pred)
mse_RegressorNeuralNet_test = mean_squared_error(y_test_orig, test_y_pred)

print(f"MAE (Teste): {mae_RegressorNeuralNet_test:.4f}")
print(f"MSE (Teste): {mse_RegressorNeuralNet_test:.4f}")

MAE (Teste): 0.0156
MSE (Teste): 0.0005


# MultiLayerPerceptron model com 2 camadas ocultas e 64 neurônios em 1001 epochs com batch size de 32

# Load e tratamento do dataset para o novo modelo

In [None]:
df = pd.read_csv('/content/NFLX.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day_of_week'] = df['Date'].dt.dayofweek
X = df.drop(labels=['Date','Close'], axis=1)
y = df['Close']
scaler = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))
y_scaled = scaler_y.fit_transform(y_scaled.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y_scaled,
                                                    test_size=0.3,
                                                    random_state=42)

# Checagem para aproveitar GPU acceleration

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Conversão para PyTorch tensor e envio para o device disponível (CPU OU GPU)

In [None]:
X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test_t = torch.tensor(y_test, dtype=torch.float32).to(device)

In [None]:
dataset = TensorDataset(X_train_t, y_train_t)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
input_dim = X.shape[1]

model = nn.Sequential(
    nn.Linear(input_dim, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(64, 1)
).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
n_epochs = 1001
train_losses = []
test_losses = []

In [None]:
for epoch in range(n_epochs):
  total_loss = 0
  for X_scaled, y_scaled in dataloader:
    optimizer.zero_grad()
    y_pred = model(X_scaled)
    loss = criterion(y_pred, y_scaled)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()

    if epoch % 100 == 0:
      y_pred_test = model(X_test_t)
      loss_test = criterion(y_pred_test, y_test_t)
      print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(dataloader):.4f}, Test Loss: {loss_test:.4f}")

Epoch 1/1001, Loss: 0.0105, Test Loss: 0.8037
Epoch 1/1001, Loss: 0.0196, Test Loss: 0.7467
Epoch 1/1001, Loss: 0.0282, Test Loss: 0.6807
Epoch 1/1001, Loss: 0.0354, Test Loss: 0.6287
Epoch 1/1001, Loss: 0.0394, Test Loss: 0.5845
Epoch 1/1001, Loss: 0.0464, Test Loss: 0.5307
Epoch 1/1001, Loss: 0.0529, Test Loss: 0.4834
Epoch 1/1001, Loss: 0.0599, Test Loss: 0.4340
Epoch 1/1001, Loss: 0.0637, Test Loss: 0.3983
Epoch 1/1001, Loss: 0.0658, Test Loss: 0.3512
Epoch 1/1001, Loss: 0.0700, Test Loss: 0.3173
Epoch 1/1001, Loss: 0.0734, Test Loss: 0.2867
Epoch 1/1001, Loss: 0.0754, Test Loss: 0.2550
Epoch 1/1001, Loss: 0.0779, Test Loss: 0.2240
Epoch 1/1001, Loss: 0.0794, Test Loss: 0.1886
Epoch 1/1001, Loss: 0.0818, Test Loss: 0.1627
Epoch 1/1001, Loss: 0.0825, Test Loss: 0.1384
Epoch 1/1001, Loss: 0.0833, Test Loss: 0.1207
Epoch 1/1001, Loss: 0.0840, Test Loss: 0.1024
Epoch 1/1001, Loss: 0.0859, Test Loss: 0.0876
Epoch 1/1001, Loss: 0.0866, Test Loss: 0.0716
Epoch 1/1001, Loss: 0.0873, Test L

In [None]:
model.eval()

with torch.no_grad():
  train_pred = model(X_train_t).cpu().numpy()
  test_pred = model(X_test_t).cpu().numpy()

In [None]:
train_pred_orig = scaler_y.inverse_transform(train_pred)
test_pred_orig = scaler_y.inverse_transform(test_pred)
y_train_orig = scaler_y.inverse_transform(y_train_t.cpu().numpy())
y_test_orig = scaler_y.inverse_transform(y_test_t.cpu().numpy())

In [None]:
mae_train_mlp2 = mean_absolute_error(y_train_orig, train_pred_orig)
mse_train_mlp2 = mean_squared_error(y_train_orig, train_pred_orig)
mae_test_mlp2 = mean_absolute_error(y_test_orig, test_pred_orig)
mse_test_mlp2 = mean_squared_error(y_test_orig, test_pred_orig)

In [None]:
print(f"MAE (Treinamento): {mae_train_mlp2:.4f}")
print(f"MSE (Treinamento): {mse_train_mlp2:.4f}")
print(f"MAE (Teste): {mae_test_mlp2:.4f}")
print(f"MSE (Teste): {mse_test_mlp2:.4f}")

MAE (Treinamento): 0.0235
MSE (Treinamento): 0.0020
MAE (Teste): 0.0212
MSE (Teste): 0.0018


In [None]:
np.median(y_train_orig)

-0.53078544

In [None]:
train_quantiles = np.percentile(y_train_orig, [25, 75])
iqr_train = train_quantiles[1] - train_quantiles[0]
train_lower_bound = train_quantiles[0] - 1.5 * iqr_train
train_upper_bound = train_quantiles[1] + 1.5 * iqr_train

In [None]:
np.median(y_test_orig)

-0.5675222

In [None]:
test_quantiles = np.percentile(y_test_orig, [25, 75])
iqr_test = test_quantiles[1] - test_quantiles[0]
test_lower_bound = test_quantiles[0] - 1.5 * iqr_test
test_upper_bound = test_quantiles[1] + 1.5 * iqr_test

In [None]:
print(f"IQR do Treino: {iqr_train}")
print(f"IQR do Teste: {iqr_test}")
print(f"Treinamento Lower Bound: {train_lower_bound:.4f}, Upper Bound: {train_upper_bound:.4f}")
print(f"Teste Lower Bound: {test_lower_bound:.4f}, Upper Bound: {test_upper_bound:.4f}")

IQR do Treino: 0.977793961763382
IQR do Teste: 0.8228887058794498
Treinamento Lower Bound: -2.1334, Upper Bound: 1.7778
Teste Lower Bound: -1.9020, Upper Bound: 1.3896


# MultiLayerPerceptron model com 3 camadas ocultas e 32 neurônios em 600 epochs com batch size de 16

# Load e tratamento do dataset para o novo modelo

In [None]:
df = pd.read_csv('/content/NFLX.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day_of_week'] = df['Date'].dt.dayofweek
X = df.drop(labels=['Date','Close'], axis=1)
y = df['Close']
scaler = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))
y_scaled = scaler_y.fit_transform(y_scaled.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y_scaled,
                                                    test_size=0.3,
                                                    random_state=42)

 # Checagem para aproveitar GPU acceleration, Conversão para PyTorch tensor e envio para o device disponível (CPU OU GPU)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test_t = torch.tensor(y_test, dtype=torch.float32).to(device)

In [None]:
dataset = TensorDataset(X_train_t, y_train_t)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
input_dim = X.shape[1]

model = nn.Sequential(
    nn.Linear(input_dim, 32),
    nn.ReLU(),
    nn.Linear(32, 32),
    nn.ReLU(),
    nn.Linear(32, 32),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(32, 1)
).to(device)


In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
n_epochs = 600
train_losses = []
test_losses = []

In [None]:
for epoch in range(n_epochs):
  total_loss = 0
  for X_scaled, y_scaled in dataloader:
    optimizer.zero_grad()
    y_pred = model(X_scaled)
    loss = criterion(y_pred, y_scaled)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()

    if epoch % 100 == 0:
      y_pred_test = model(X_test_t)
      loss_test = criterion(y_pred_test, y_test_t)
      print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(dataloader):.4f}, Test Loss: {loss_test:.4f}")

Epoch 1/600, Loss: 0.0050, Test Loss: 0.9641
Epoch 1/600, Loss: 0.0124, Test Loss: 0.9502
Epoch 1/600, Loss: 0.0176, Test Loss: 0.9407
Epoch 1/600, Loss: 0.0268, Test Loss: 0.9310
Epoch 1/600, Loss: 0.0310, Test Loss: 0.9230
Epoch 1/600, Loss: 0.0370, Test Loss: 0.9108
Epoch 1/600, Loss: 0.0397, Test Loss: 0.9017
Epoch 1/600, Loss: 0.0424, Test Loss: 0.8928
Epoch 1/600, Loss: 0.0456, Test Loss: 0.8793
Epoch 1/600, Loss: 0.0500, Test Loss: 0.8730
Epoch 1/600, Loss: 0.0555, Test Loss: 0.8639
Epoch 1/600, Loss: 0.0584, Test Loss: 0.8524
Epoch 1/600, Loss: 0.0621, Test Loss: 0.8432
Epoch 1/600, Loss: 0.0664, Test Loss: 0.8323
Epoch 1/600, Loss: 0.0686, Test Loss: 0.8230
Epoch 1/600, Loss: 0.0726, Test Loss: 0.8137
Epoch 1/600, Loss: 0.0762, Test Loss: 0.8053
Epoch 1/600, Loss: 0.0778, Test Loss: 0.7899
Epoch 1/600, Loss: 0.0804, Test Loss: 0.7791
Epoch 1/600, Loss: 0.0820, Test Loss: 0.7683
Epoch 1/600, Loss: 0.0840, Test Loss: 0.7554
Epoch 1/600, Loss: 0.0899, Test Loss: 0.7417
Epoch 1/60

In [None]:
model.eval()

with torch.no_grad():
  train_pred = model(X_train_t).cpu().numpy()
  test_pred = model(X_test_t).cpu().numpy()

In [None]:
train_pred_orig = scaler_y.inverse_transform(train_pred)
test_pred_orig = scaler_y.inverse_transform(test_pred)
y_train_orig = scaler_y.inverse_transform(y_train_t.cpu().numpy())
y_test_orig = scaler_y.inverse_transform(y_test_t.cpu().numpy())

In [None]:
mae_train_mlp3 = mean_absolute_error(y_train_orig, train_pred_orig)
mse_train_mlp3 = mean_squared_error(y_train_orig, train_pred_orig)
mae_test_mlp3 = mean_absolute_error(y_test_orig, test_pred_orig)
mse_test_mlp3 = mean_squared_error(y_test_orig, test_pred_orig)

In [None]:
print(f"MAE (Treinamento): {mae_train_mlp3:.4f}")
print(f"MSE (Treinamento): {mse_train_mlp3:.4f}")
print(f"MAE (Teste): {mae_test_mlp3:.4f}")
print(f"MSE (Teste): {mse_test_mlp3:.4f}")

MAE (Treinamento): 0.0229
MSE (Treinamento): 0.0007
MAE (Teste): 0.0230
MSE (Teste): 0.0007


In [None]:
train_quantiles = np.percentile(y_train_orig, [25, 75])
iqr_train = train_quantiles[1] - train_quantiles[0]
train_lower_bound = train_quantiles[0] - 1.5 * iqr_train
train_upper_bound = train_quantiles[1] + 1.5 * iqr_train

In [None]:
test_quantiles = np.percentile(y_test_orig, [25, 75])
iqr_test = test_quantiles[1] - test_quantiles[0]
test_lower_bound = test_quantiles[0] - 1.5 * iqr_test
test_upper_bound = test_quantiles[1] + 1.5 * iqr_test

In [None]:
print(f"IQR do Treino: {iqr_train}")
print(f"IQR do Teste: {iqr_test}")
print(f"Treinamento Lower Bound: {train_lower_bound:.4f}, Upper Bound: {train_upper_bound:.4f}")
print(f"Teste Lower Bound: {test_lower_bound:.4f}, Upper Bound: {test_upper_bound:.4f}")

IQR do Treino: 0.977793961763382
IQR do Teste: 0.8228887058794498
Treinamento Lower Bound: -2.1334, Upper Bound: 1.7778
Teste Lower Bound: -1.9020, Upper Bound: 1.3896


In [None]:
results = pd.DataFrame({
    'Modelo': ['Regressor NeuralNet - 2 Camadas', 'MLP - 2 Camadas', 'MLP - 3 Camadas'],
    'MAE Treino': [mae_RegressorNeuralNet_train, mae_train_mlp2, mae_train_mlp3],
    'MAE Teste': [mae_RegressorNeuralNet_test, mae_test_mlp2, mae_test_mlp3],
    'MSE Treino': [mse_RegressorNeuralNet_train, mse_train_mlp2, mse_train_mlp3],
    'MSE Teste': [mse_RegressorNeuralNet_test, mse_test_mlp2, mse_test_mlp3]
})

print(results)

                            Modelo  MAE Treino  MAE Teste  MSE Treino  \
0  Regressor NeuralNet - 2 Camadas    0.015942   0.015559    0.000504   
1                  MLP - 2 Camadas    0.023453   0.021242    0.002005   
2                  MLP - 3 Camadas    0.022945   0.023006    0.000676   

   MSE Teste  
0   0.000498  
1   0.001780  
2   0.000678  
