In [None]:
import json
import os
import time

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

In [None]:
class NetworkConfig:
    def __init__(
            self,
            base_channels,
            compression_channels,
            progression,
            n_blocks
    ):
        self.seq_length = 50
        self.input_channels = 6
        self.base_channels = base_channels
        self.compression_channels = compression_channels
        self.n_blocks = n_blocks
        self.progression = progression


class DilatedBase(nn.Module):
    def __init__(self, config: NetworkConfig):
        super().__init__()
        self.config = config

    def _create_conv_block(
            self,
            dilation: int,
            in_channels: int,
            dilated_channels: int,
            out_channels: int
    ) -> nn.Sequential:
        return nn.Sequential(
            # Convolution dilatée
            nn.Conv1d(
                in_channels=in_channels,
                out_channels=dilated_channels,
                kernel_size=3,
                dilation=dilation,
                padding='same'
            ),
            nn.BatchNorm1d(dilated_channels),
            nn.ReLU(),

            # Compression/Expansion
            nn.Conv1d(
                in_channels=dilated_channels,
                out_channels=out_channels,
                kernel_size=1
            ),
            nn.BatchNorm1d(out_channels),
            nn.ReLU()
        )


class DilatedEncoder(DilatedBase):
    def __init__(self, config: NetworkConfig):
        super().__init__(config)

        # Création des blocs dilatation + compression
        self.conv_blocks = self._create_encoder_blocks()

        # Couches de traitement final
        self.pooling = nn.MaxPool1d(kernel_size=2, stride=2)
        self.final_projection = self._create_final_projection()
        self.output_layers = self._create_output_layers()

    def _create_encoder_blocks(self) -> nn.ModuleList:
        """Crée les blocs de convolution dilatée + compression."""
        modules = []
        for i in range(self.config.n_blocks):
            in_channels = self.config.input_channels if i == 0 else self.config.compression_channels
            modules.append(
                self._create_conv_block(
                    dilation=2 ** (i + 1),
                    in_channels=in_channels,
                    dilated_channels=self.config.base_channels,
                    out_channels=self.config.compression_channels
                )
            )
        return nn.ModuleList(modules)

    def _create_final_projection(self) -> nn.Sequential:
        """Crée la couche de projection finale."""
        concat_channels = self.config.compression_channels * self.config.n_blocks
        return nn.Sequential(
            nn.Conv1d(
                in_channels=concat_channels,
                out_channels=self.config.compression_channels,
                kernel_size=1
            ),
            nn.BatchNorm1d(self.config.compression_channels),
            nn.ReLU()
        )

    def _create_output_layers(self) -> nn.ModuleList:
        """Crée les couches de sortie."""
        modules = []
        in_channels = self.config.compression_channels

        for out_channels in self.config.progression:
            modules.append(
                nn.Sequential(
                    nn.Conv1d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=3,
                        padding='same'
                    ),
                    nn.BatchNorm1d(out_channels),
                    nn.ReLU()
                )
            )
            in_channels = out_channels
        return nn.ModuleList(modules)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Debug des distributions
        compressed_features = []
        x_prev = x

        for i, block in enumerate(self.conv_blocks):
            x_compressed = block(x_prev)
            x_prev = x_compressed
            compressed_features.append(x_compressed)

        concat = torch.cat(compressed_features, dim=1)

        encoded = self.final_projection(concat)

        for i, layer in enumerate(self.output_layers):
            encoded = self.pooling(encoded)
            encoded = layer(encoded)

        return encoded


class DilatedDecoder(DilatedBase):
    def __init__(self, config: NetworkConfig):
        super().__init__(config)

        self.input_layers = self._create_input_layers()
        self.conv_blocks = self._create_decoder_blocks()
        self.final_layer = self._create_final_projection()

    def _create_decoder_blocks(self) -> nn.ModuleList:
        """Crée les blocs de convolution dilatée + expansion."""
        modules = []
        for i in reversed(range(self.config.n_blocks)):
            modules.append(
                self._create_conv_block(
                    dilation=2 ** (i + 1),
                    in_channels=self.config.compression_channels,
                    dilated_channels=self.config.base_channels,
                    out_channels=self.config.compression_channels
                )
            )
        return nn.ModuleList(modules)

    def _create_input_layers(self) -> nn.ModuleList:
        """Crée les couches d'entrée avec upsampling."""
        modules = []
        progression = list(reversed(self.config.progression))
        prev_channels = progression[0]

        for i, channels in enumerate(progression[1:] + [self.config.compression_channels]):
            is_last_layer = i == len(progression[1:] + [self.config.compression_channels]) - 1
            modules.append(
                nn.Sequential(
                    nn.Upsample(size=self.config.seq_length if is_last_layer else None,
                                scale_factor=None if is_last_layer else 2),
                    nn.Conv1d(
                        in_channels=prev_channels,
                        out_channels=channels,
                        kernel_size=3,
                        padding='same'
                    ),
                    nn.BatchNorm1d(channels),
                    nn.ReLU()
                )
            )
            prev_channels = channels

        return nn.ModuleList(modules)

    def _create_final_projection(self) -> nn.Sequential:
        """Crée la couche de projection finale."""
        concat_channels = self.config.compression_channels * self.config.n_blocks
        return nn.Sequential(
            nn.Conv1d(
                in_channels=concat_channels,
                out_channels=self.config.input_channels,
                kernel_size=1
            ),
            nn.ReLU()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Application des couches d'entrée avec upsampling
        decoded = x
        for layer in self.input_layers:
            decoded = layer(decoded)

        # Application des blocs avec récupération des features
        x_prev = decoded
        compressed_features = []

        for block in self.conv_blocks:
            x_compressed = block(x_prev)
            x_prev = x_compressed
            compressed_features.append(x_compressed)

        # Concaténation et couche finale
        concat = torch.cat(compressed_features, dim=1)
        decoded = self.final_layer(concat)

        return decoded
    
class CompletModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.encoder = DilatedEncoder(config)
        self.decoder = DilatedDecoder(config)
    
    def forward(self, x):
        encoded = self.encoder(x)
        output = self.decoder(encoded)
        return output

In [None]:
# All windows

data = pd.read_csv("./final_stocks_2.csv", usecols=["log_return_DlyClose", "log_return_DlyLow", "log_return_DlyHigh", "log_return_DlyBid", "log_return_DlyAsk", "volume_normalized"])

In [None]:
# Positive Future windows

data = pd.read_csv("./final_stocks_4.csv", usecols=["DlyClose", "DlyLow", "DlyHigh", "DlyBid", "DlyAsk", "DlyVol"])

In [None]:
# Negative Future windows

data = pd.read_csv("./final_stocks_negative.csv", usecols=["DlyClose", "DlyLow", "DlyHigh", "DlyBid", "DlyAsk", "DlyVol"])

In [None]:
seq_length = 50

In [None]:
stride = seq_length
windows = np.array([data[i:i + seq_length] for i in range(0, len(data) - seq_length - 1, stride)])
windows = torch.FloatTensor(windows).transpose(1, 2)
train_size = int(0.9 * len(windows))
val_size = len(windows) - train_size
train_data, test_data = random_split(windows, [train_size, val_size])
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

epochs = 100
batch_size = 32
input_dim = 6

learning_rate = 0.001
base_channels = 20
compression_channels = 6
n_blocks = 5
progression = [20, 40, 60]

model_folder = "models/autoencoder_negative/"

config = NetworkConfig(
    n_blocks=n_blocks,
    base_channels=base_channels,
    compression_channels=compression_channels,
    progression=progression
)

model = CompletModel(config).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
timer = time.time()
folder = model_folder + str(timer) + "/"
os.makedirs(folder + "checkpoints/", exist_ok=True)

# Sauvegarde de la configuration
config = {
    # Paramètres d'entraînement
    'batch_size': batch_size,
    'epochs': epochs,
    'learning_rate': learning_rate,
    'device': str(device),
    
    # Paramètres du modèle
    'input_dim': input_dim,
    'seq_len': seq_length,
    'base_channels': base_channels,
    'compression_channels': compression_channels,
    'progression': progression,
    
    # Informations sur l'architecture
    'optimizer': optimizer.__class__.__name__,
    
    # Timestamp et dossier
    'timestamp': timer,
    'model_folder': model_folder
}

# Sauvegarde de la configuration
with open(folder + 'config.json', 'w', encoding='utf-8') as f:
    json.dump(config, f, ensure_ascii=False, indent=4)

In [None]:
from src.utils.train import train_model

train_model(model, 100, train_loader, test_loader, optimizer, device, folder)

In [None]:
timestamp = "1740724678.7526445"
path_checkpoint = model_folder + timestamp + "/checkpoints/model_epoch_7.pt"

In [None]:
checkpoint = torch.load(path_checkpoint)
model.load_state_dict(checkpoint['transformer_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()

In [None]:
from src.types import dsf_dtype_dict

dsf = pd.read_csv("./dsf_v2_patched_small.csv",
                  dtype=dsf_dtype_dict,
                  parse_dates=['DlyCalDt'],
                  usecols=['DlyCalDt', 'PERMNO', 'DlyClose']
                  )

In [None]:
# ALL WINDOWS
final_stocks = pd.read_csv("./final_stocks_2.csv")

In [None]:
from src.benchmark.benchmark import build_predictions

predictions_results = build_predictions(model, final_stocks, device, model_folder + timestamp + "/")

In [None]:
predictions_results = pd.read_csv(folder + "predictions_results.csv")

In [None]:
from src.benchmark.benchmark import build_quantiles

quantiles = [0.98, 0.99, 0.994, 0.995, 0.996, 0.997]
quantiles_results = build_quantiles(predictions_results, quantiles, dsf)

In [None]:
from src.benchmark.benchmark import analyze_quantiles

analyze_quantiles(quantiles_results, model_folder + timestamp + "/stats", False)
analyze_quantiles(quantiles_results, model_folder + timestamp + "/stats", True)