# Library

In [None]:
# Native library
from os.path import join

# Data management
import numpy as np
import xarray as xr

# Data prepocessing
from src.data.preprocessing import Smoother
from src.data.process_data import (merge_satellite, compute_vi, add_observation, 
                                   add_weather, statedev_fill, features_modification,
                                   scale_data, create_id)

import torch
import torch.nn as nn
from src.models.trainer import Trainer
from src.models.model import CustomModel
from src.models.train import get_device
from src.models.dataloader import get_dataloaders, DataLoader

from src.constants import FOLDER, S_COLUMNS, TARGET
from utils import ROOT_DIR

TEST = False

# Load Data

In [None]:
processed_dir = join(ROOT_DIR, "data", "processed", FOLDER)
file_name = "train.nc"

xds = merge_satellite(file_name)
xds

# Process Data

In [None]:
# Process and Merge EY data to Satellite Dataset
xds = add_observation(xds, TEST)
xds

In [None]:
# Process and Merge Weather data to Satellite & EY Dataset
xds = add_weather(xds)
xds

In [None]:
# Compute vegetable indices
xds = compute_vi(xds)
xds

In [None]:
# Fill missing vegetable indice and replace abnormal values
xds = statedev_fill(xds)
xds

In [None]:
# Smooth variable
xds = Smoother(mode='savgol').transform(xds)
xds

In [None]:
# Create new features
xds = features_modification(xds, TEST)
xds

In [None]:
# Scale data
xds = scale_data(xds, processed_dir, TEST)
xds

In [None]:
# Add an id for each line
xds = create_id(xds)
xds

# Initialize Model

In [None]:
def init_model() -> tuple[dict, DataLoader, DataLoader]:
    """ Init W&B logger and get the model config from W&B sweep config yaml file
        + get the training and validation dataloaders.

    :return: the model config and the training and validation dataloaders
    :rtype: (dict, DataLoader, DataLoader)
    """

    epochs = 25
    lstm_dropout = .5
    cnn_dropout = .1
    fc_dropout = .4
    criterion = 'MSELoss'
    optimizer = 'AdamW'
    batch_size = 16
    learning_rate = .001
    scheduler_patience = 4
    c_out_in_features_1 = 150
    c_out_in_features_2 = 150
    m_num_layers = 2
    s_num_layers = 2
    s_hidden_size = 150
    m_hidden_size = 150
    train_dataloader, val_dataloader, _ = get_dataloaders(batch_size, 0.2, get_device())
    first_row = train_dataloader.dataset[0]
    
    c_in_features = s_hidden_size - 2 + m_hidden_size - 2 + first_row['g_input'].shape[0]

    config = {
        'batch_size': batch_size,
        's_hidden_size': s_hidden_size,
        's_num_layers': s_num_layers,
        'm_hidden_size': m_hidden_size,
        'm_num_layers': m_num_layers,
        'learning_rate': learning_rate,
        'scheduler_patience': scheduler_patience,
        'lstm_dropout': lstm_dropout,
        'cnn_dropout': cnn_dropout,
        'fc_dropout': fc_dropout,
        'epochs': epochs,
        'optimizer': optimizer,
        'criterion': criterion,
        's_num_features': first_row['s_input'].shape[1],
        'm_num_features': first_row['m_input'].shape[1],
        'g_in_features': first_row['g_input'].shape[0],
        'c_in_features': c_in_features,
        'c_out_in_features_1': c_out_in_features_1,
        'c_out_in_features_2': c_out_in_features_2,
        'train_size': len(train_dataloader),
        'val_size': len(val_dataloader),
    }

    return config, train_dataloader, val_dataloader

# Train Model

Train using the script to avoid tqdm bug.

In [None]:
# empty the GPU cache
torch.cuda.empty_cache()

# get the device
device = get_device()

# init W&B logger and get the model config from W&B sweep config yaml file
# + get the training and validation dataloaders
config, train_dataloader, val_dataloader = init_model()

# init the model
model = CustomModel(config)
model.to(device)

# init the loss, optimizer and learning rate scheduler
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                        patience=config['scheduler_patience'],
                                                        verbose=True)

train_config = {
    'model': model,
    'train_dataloader': train_dataloader,
    'val_dataloader': val_dataloader,
    'epochs': config['epochs'],
    'criterion': criterion,
    'optimizer': optimizer,
    'scheduler': scheduler,
}

# init the trainer
trainer = Trainer(**train_config)

# train the model
trainer.train()