# Train Revenue Prediction Model

This model predic the revenue of the next quarter of all B3 listed companies.

## Import Packages

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

## Constants & Settings

In [2]:
DATA_SOURCE_FOLDER = "/workspaces/valuation/data/staging/numpy"
TENSORBOARD_LOG_DIR = "/workspaces/valuation/data/staging/tensorboard"

# device configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize TensorBoard writer
writer = SummaryWriter(log_dir=TENSORBOARD_LOG_DIR)

## Read Preprocessed Data

In [3]:
X_train = np.load(os.path.join(DATA_SOURCE_FOLDER, 'X_train.npy'))
y_train = np.load(os.path.join(DATA_SOURCE_FOLDER, 'y_train.npy'))
X_test = np.load(os.path.join(DATA_SOURCE_FOLDER, 'X_test.npy'))
y_test = np.load(os.path.join(DATA_SOURCE_FOLDER, 'y_test.npy'))

## Hiperparameter

In [4]:
INPUT_SIZE = X_train.shape[2]
OUTPUT_SIZE = 1
HIDDEN_SIZE = 50
NUM_LAYERS = 2
LEARNING_RATE = 1.9
NUM_EPOCHS = 20
BATCH_SIZE = 10
SHUFFLE_TRAIN_DATA = False
SHUFFLE_TEST_DATA = False

## Define the LSTM model

In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
    
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE)
        
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out


## Create dataset and dataloaders

In [6]:
X_train_torch = torch.from_numpy(X_train)
y_train_torch = torch.from_numpy(y_train)
X_test_torch = torch.from_numpy(X_test)
y_test_torch = torch.from_numpy(y_test)

train_dataset = TensorDataset(X_train_torch, y_train_torch)
test_dataset = TensorDataset(X_test_torch, y_test_torch)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE_TRAIN_DATA)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=SHUFFLE_TEST_DATA)


## Validation Function

In [7]:
def validation(model, criterion, test_loader):
    # Testing loop
    model.eval()
    with torch.no_grad():
        test_loss = 0
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
        
        test_loss /= len(test_loader)
        return test_loss

## Model, loss function, optimizer

In [8]:
model = LSTMModel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE).to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


## Trainig the Model

In [9]:
# Training the model
for epoch in range(NUM_EPOCHS):
    
    model.train()
    optimizer.zero_grad()
    avg_loss = 0

    for data, targets in train_loader:
        data, targets = data.to(DEVICE), targets.to(DEVICE)
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        avg_loss += loss.item() / len(train_loader)

    avg_test_loss = validation(model, criterion, test_loader)

    # Log the losses to TensorBoard
    writer.add_scalar('Loss/Train', avg_loss, epoch)
    writer.add_scalar('Loss/Test', avg_test_loss, epoch)

    print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Avg Train Loss: {avg_loss:.4f}, Avg Test Loss: {avg_test_loss:.4f}')


Epoch [1/20], Avg Train Loss: 395823337363860.4375, Avg Test Loss: 757636355193600.0000
Epoch [2/20], Avg Train Loss: 395488356760247.3750, Avg Test Loss: 757250093628648.7500
Epoch [3/20], Avg Train Loss: 395221041404313.6875, Avg Test Loss: 756877034360226.8750
Epoch [4/20], Avg Train Loss: 394961671459639.3750, Avg Test Loss: 756509959471895.2500
Epoch [5/20], Avg Train Loss: 394706461028903.0000, Avg Test Loss: 756146595677882.1250
Epoch [6/20], Avg Train Loss: 394454206102159.9375, Avg Test Loss: 755786062887144.7500
Epoch [7/20], Avg Train Loss: 394204375575313.1250, Avg Test Loss: 755427844729530.1250
Epoch [8/20], Avg Train Loss: 393956646532031.6250, Avg Test Loss: 755071755956642.8750
Epoch [9/20], Avg Train Loss: 393710932178306.4375, Avg Test Loss: 754717591005742.5000
Epoch [10/20], Avg Train Loss: 393467055464805.1250, Avg Test Loss: 754365346862405.8750
Epoch [11/20], Avg Train Loss: 393225119783467.1250, Avg Test Loss: 754014971432029.1250
Epoch [12/20], Avg Train Loss: