In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import r2_score
import logging
from data_utils import load_info, create_dataloaders, load_preprocessed_data

logging.basicConfig(filename='model_output.log', level=logging.INFO, format='%(asctime)s - %(message)s')

In [19]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_layers, output_dim):
        super(NeuralNetwork, self).__init__()
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [20]:
def l1_regularization(model, l1_lambda):
    l1_norm = sum(p.abs().sum() for p in model.parameters())
    return l1_lambda * l1_norm

In [21]:
def train(model, train_loader, valid_loader, criterion, optimizer, epochs, patience, l1_lambda, best_loss, patience_counter):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0

        for data, target in train_loader:
            data = data.float().requires_grad_()
            target = target.float()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output.squeeze(), target) + l1_regularization(model, l1_lambda)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.size(0)
        
        train_loss /= len(train_loader.dataset)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data, target in valid_loader:
                data = data.float()
                target = target.float()
                output = model(data)
                loss = criterion(output.squeeze(), target)
                val_loss += loss.item() * data.size(0)
        
        val_loss /= len(valid_loader.dataset)
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}')
        
        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print("Early stopping triggered")
            break

In [22]:
input_data, target_data = load_preprocessed_data()
print(input_data.shape, target_data.shape)
firm_info, _ = load_info()

train_loader, valid_loader, test_loader, _ = create_dataloaders(
    input_data, target_data, firm_info,
    train_date='2005-01-01', valid_date='2010-01-01', test_date='2015-11-01', batch_size=2000)

print(len(train_loader), len(valid_loader), len(test_loader))

(576574, 252) (576574, 3)
114 52 57


In [23]:
# Hyperparameters setting
input_dim = input_data.shape[1] - 2
output_dim = 1
learning_rate = 0.001
epochs = 100
patience = 5
l1_lambda = 1e-5

model = NeuralNetwork(input_dim, [64, 32], output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_loss = float('inf')
patience_counter = 10

In [24]:
train(model, train_loader, valid_loader, criterion, optimizer, epochs, patience, l1_lambda, best_loss, patience_counter)

Epoch 1/100, Train Loss: 0.032803, Val Loss: 0.025236
Epoch 2/100, Train Loss: 0.030152, Val Loss: 0.025237
Epoch 3/100, Train Loss: 0.029292, Val Loss: 0.025196
Epoch 4/100, Train Loss: 0.028904, Val Loss: 0.025231
Epoch 5/100, Train Loss: 0.028697, Val Loss: 0.025231
Epoch 6/100, Train Loss: 0.028569, Val Loss: 0.025181
Epoch 7/100, Train Loss: 0.028477, Val Loss: 0.025154
Epoch 8/100, Train Loss: 0.028419, Val Loss: 0.025134
Epoch 9/100, Train Loss: 0.028383, Val Loss: 0.025116
Epoch 10/100, Train Loss: 0.028357, Val Loss: 0.025056
Epoch 11/100, Train Loss: 0.028332, Val Loss: 0.025068
Epoch 12/100, Train Loss: 0.028318, Val Loss: 0.025041
Epoch 13/100, Train Loss: 0.028304, Val Loss: 0.025036
Epoch 14/100, Train Loss: 0.028294, Val Loss: 0.025024
Epoch 15/100, Train Loss: 0.028286, Val Loss: 0.025012
Epoch 16/100, Train Loss: 0.028281, Val Loss: 0.025008
Epoch 17/100, Train Loss: 0.028275, Val Loss: 0.025016
Epoch 18/100, Train Loss: 0.028273, Val Loss: 0.025015
Epoch 19/100, Train

In [25]:
# model test
model.eval()
y_pred_list = []
y_true_list = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch.float())
        y_pred_list.extend(y_pred.numpy())
        y_true_list.extend(y_batch.numpy())

y_pred_array = np.array(y_pred_list)
y_true_array = np.array(y_true_list)

r2 = r2_score(y_true_array, y_pred_array)
print(f'R² score: {r2}')

R² score: -0.016326279921723863
