# Train Revenue Prediction Model

This model predic the revenue of the next quarter of all B3 listed companies.

## Import Packages

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

## Constants & Settings

In [2]:
DATA_SOURCE_FOLDER = "/workspaces/valuation/data/staging/numpy"

# device configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Read Preprocessed Data

In [3]:
X_train = np.load(os.path.join(DATA_SOURCE_FOLDER, 'X_train.npy'))
y_train = np.load(os.path.join(DATA_SOURCE_FOLDER, 'y_train.npy'))
X_test = np.load(os.path.join(DATA_SOURCE_FOLDER, 'X_test.npy'))
y_test = np.load(os.path.join(DATA_SOURCE_FOLDER, 'y_test.npy'))

## Hiperparameter

In [4]:
INPUT_SIZE = X_train.shape[2]
OUTPUT_SIZE = 1
HIDDEN_SIZE = 50
NUM_LAYERS = 2
LEARNING_RATE = 0.001
NUM_EPOCHS = 20
BATCH_SIZE = 10
SHUFFLE_TRAIN_DATA = False
SHUFFLE_TEST_DATA = False

## Define the LSTM model

In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
    
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE)
        
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out


## Create dataset and dataloaders

In [6]:
X_train_torch = torch.from_numpy(X_train)
y_train_torch = torch.from_numpy(y_train)
X_test_torch = torch.from_numpy(X_test)
y_test_torch = torch.from_numpy(y_test)

train_dataset = TensorDataset(X_train_torch, y_train_torch)
test_dataset = TensorDataset(X_test_torch, y_test_torch)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE_TRAIN_DATA)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE_TEST_DATA)


## Model, loss function, optimizer

In [7]:
model = LSTMModel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE).to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


## Trainig the Model

In [15]:
# Training the model
for epoch in range(NUM_EPOCHS):
    model.train()
    optimizer.zero_grad()
    for data, targets in train_loader:
        data, targets = data.to(DEVICE), targets.to(DEVICE)
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {loss.item():.4f}')


Epoch [1/20], Loss: 38078458626048.0000
Epoch [2/20], Loss: 38077900783616.0000
Epoch [3/20], Loss: 38077363912704.0000
Epoch [4/20], Loss: 38076839624704.0000
Epoch [5/20], Loss: 38076311142400.0000
Epoch [6/20], Loss: 38075786854400.0000
Epoch [7/20], Loss: 38075262566400.0000
Epoch [8/20], Loss: 38074738278400.0000
Epoch [9/20], Loss: 38074213990400.0000
Epoch [10/20], Loss: 38073689702400.0000
Epoch [11/20], Loss: 38073165414400.0000
Epoch [12/20], Loss: 38072632737792.0000
Epoch [13/20], Loss: 38072108449792.0000
Epoch [14/20], Loss: 38071584161792.0000
Epoch [15/20], Loss: 38071059873792.0000
Epoch [16/20], Loss: 38070535585792.0000
Epoch [17/20], Loss: 38070011297792.0000
Epoch [18/20], Loss: 38069487009792.0000
Epoch [19/20], Loss: 38068962721792.0000
Epoch [20/20], Loss: 38068438433792.0000
