# *__Working on BTCUSD predictions with GRU model.__*

## *__Check first before starting__*

In [83]:
import os
os._exit(00)  # Immediately kill the Python process

: 

In [82]:
# Do 'pipenv install ipykernel' if you get error.
print("Kernel is working correctly!")

Kernel is working correctly!


In [70]:
import os

# Change the working directory to the project root
Working_directory = os.path.normpath("C:/Users/gilda/OneDrive/Documents/_NYCU/MASTER_S_studies/Master\'s Thesis/LABORATORY/_Global_Pytorch/Continual_Learning")
os.chdir(Working_directory)
print(f"Working directory: {os.getcwd()}")  # Prints the current working directory

Working directory: C:\Users\gilda\OneDrive\Documents\_NYCU\MASTER_S_studies\Master's Thesis\LABORATORY\_Global_Pytorch\Continual_Learning


## **__All imports__**

In [71]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_interactions import zoom_factory, panhandler
from sklearn.model_selection import train_test_split
import pickle
from ta import trend, momentum, volatility, volume
import math
from scipy.ndimage import gaussian_filter1d
from typing import Callable, Tuple, Union
import shutil
import contextlib
import traceback
import gc
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

## __**All functions (For data processing)**__

In [72]:
def ensure_folder(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

def plot_with_matplotlib(data: pd.DataFrame, 
                         title: str, 
                         interactive: bool = False, 
                         save_path: str = None, 
                         show_plot: bool = True, 
                         save_matplotlib_object: str = None) -> None:
    """
    Plot data using Matplotlib, with optional interactivity using mpl-interactions.
    
    Parameters:
    - data (pd.DataFrame): The data to plot, must contain 'close' column.
    - title (str): The title of the plot.
    - interactive (bool): If True, enables interactive zoom and pan.
    - save_path (Optional[str]): If provided, saves the plot to this path.
    - show_plot (bool): If True, displays the plot. If False, skips display.
    - save_matplotlib_object (Optional[str]): If provided, saves the Matplotlib figure object to this file path.
    """
    if not all(col in data.columns for col in ['close']):
        raise ValueError("The input DataFrame must contain 'close' column.")

    # Use the default Matplotlib color cycle for the line
    default_blue = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
    print(f"Default blue: {default_blue}")

    # Explicit colors for trends
    trend_colors = {
        0: 'black',
        1: 'yellow',
        2: 'red',
        3: 'green',
        4: default_blue #'purple',
    }
    # unique_trends = [0, -25, -15, 15, 25]
    # colormap = plt.cm.get_cmap('tab10', len(unique_trends))  # Choose 'tab10' or 'Set1' for distinct colors
    # trend_colors = {trend: colormap(i) for i, trend in enumerate(unique_trends)}

    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot data as a single connected line, colored by trend
    if 'trend' in data.columns:
        legend_added = set() # Track which trends have already been added to the legend
        prev_idx = data.index[0]
        for idx, row in data.iterrows():
            if idx != prev_idx:
                trend_key = int(row['trend'])  # Convert trend value to int for lookup
                label = f'Trend {trend_key}' if trend_key not in legend_added else None
                ax.plot([prev_idx, idx], 
                        [data.loc[prev_idx, 'close'], row['close']],
                        color=trend_colors[trend_key], 
                        linestyle='-', 
                        # marker='o', 
                        linewidth=1,
                        label=label  # Add label only if it's not in the legend
                )
                legend_added.add(trend_key)  # Mark this trend as added to the legend
            prev_idx = idx

        ax.set_title(f"{title} (Connected, Colored by Trend)")
    else:
        # Default plot if no trend column exists
        ax.plot(data.index, data['close'], label='Closing Price', linestyle='-', marker='o', 
                markersize=2, linewidth=1, color=default_blue, markerfacecolor='green', markeredgecolor='black')
        ax.set_title(title)
    
    ax.set_xlabel('Date')
    ax.set_ylabel('Closing Price (USD)')
    
    # Add a legend manually for trends
    # for trend, color in trend_colors.items():
    #     ax.plot([], [], color=color, label=f'Trend {trend}')
    ax.legend()
    ax.grid()
    
    # Enable interactivity if requested
    if interactive:
        zoom_factory(ax)  # Enable zoom with mouse wheel
        panhandler(fig)   # Enable panning with left-click
        print("Interactive mode enabled. Use mouse wheel to zoom and left click to pan.")

    # Save the plot if a path is provided
    if save_path:
        fig.tight_layout()  # Ensures the layout is clean
        fig.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Plot saved to: {save_path}")

    # Save the Matplotlib figure object
    if save_matplotlib_object:
        with open(save_matplotlib_object, 'wb') as f:
            pickle.dump(fig, f)
        print(f"Matplotlib figure object saved to: {save_matplotlib_object}")

    if show_plot:
        plt.show()
    else:
        print("Plot display skipped.")

def load_and_show_pickle(pickle_file_path: str):
    """
    Load a pickled Matplotlib figure object and display it with optional interactivity.

    Parameters:
    - pickle_file_path (str): Path to the pickled Matplotlib figure file.

    Returns:
    - None
    """
    try:
        # Open the pickle file and load the figure
        with open(pickle_file_path, "rb") as f:
            loaded_fig = pickle.load(f)

        print(f"Figure successfully loaded and displayed from: {pickle_file_path}")

        # Use plt.show() to allow interactivity
        plt.show(block=True)

    except FileNotFoundError:
        print(f"Error: File not found at {pickle_file_path}. Please check the path.")
    except Exception as e:
        print(f"Error loading the pickled figure: {e}")

# Save data to CSV
def save_to_csv(df: pd.DataFrame, file_path: str):
    """
    Save a DataFrame to a CSV file.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data to be saved.
        file_path (str): The file path (including the file name) to save the CSV.

    Returns:
        None
    """
    df.to_csv(file_path)
    # df_to_save = df.copy()
    # df_to_save["date"] = df_to_save.index.strftime('%Y-%m-%d %H:%M:%S')  # Add formatted index as a column
    
    # Save the DataFrame to CSV
    # df_to_save.to_csv(file_path)
    print(f"\nSuccessfully saved data with moving average to CSV: \n\t{file_path}\n")

def read_csv_file(file_path: str, preview_rows: int = 5, 
                  days_towards_end: int = None, 
                  days_from_start: int = None, description: str = ""):
    """
    Reads a CSV file and returns a pandas DataFrame filtered by date range.

    Args:
        file_path (str): The path to the CSV file.
        preview_rows (int): Number of rows to preview (default is 5).
        days_towards_end (int, optional): Number of days from the most recent date to retrieve data.
        days_from_start (int, optional): Number of days from the oldest date of the filtered data to retrieve data.
        description (str): A brief description of the dataset being loaded.
                           Explanation:
                           - To retrieve data from the **end**: Use `days_towards_end`.
                           - To retrieve data from the **start of the filtered range**: Use `days_from_start`.
                           - To retrieve data from the **middle**: Use both:
                             For example, if `days_towards_end=100` and `days_from_start=50`,
                             the function will first filter the last 100 days of the dataset,
                             and then filter the first 50 days from this range.
                             This results in data between the last 100th and the last 50th day.

    Returns:
        DataFrame: The loaded and filtered data from the CSV file.
    """
    try:
        if description:
            print(f"\nDescription: {description}")
        print(f"\nFile path: {file_path}")
        
        # Read the CSV file
        data = pd.read_csv(file_path, parse_dates=['date'], index_col='date')
        
        # Filter by days towards the end
        if days_towards_end is not None:
            last_date = data.index.max()  # Get the most recent date in the dataset
            end_cutoff_date = last_date - pd.Timedelta(days=days_towards_end)
            data = data[data.index >= end_cutoff_date]
            print(f"\nRetrieving data from the past {days_towards_end} days (from {end_cutoff_date.date()} onwards):")
        
        # Filter by days from the start (from the filtered data)
        if days_from_start is not None:
            first_date = data.index.min()  # Get the earliest date in the filtered dataset
            start_cutoff_date = first_date + pd.Timedelta(days=days_from_start)
            data = data[data.index <= start_cutoff_date]
            print(f"\nRetrieving the first {days_from_start} days from the filtered data (up to {start_cutoff_date.date()}):")

        if preview_rows:
            # Print a preview of the data
            print(f"\nPreview of the first {preview_rows} rows:")
            # print(data.head(preview_rows), '\n')
            display(data.head(preview_rows))
            print()

            print(f"\nPreview of the last {preview_rows} rows:")
            # print(data.tail(preview_rows), '\n')
            display(data.tail(preview_rows))
            print()

        return data
    except FileNotFoundError:
        print("Error: File not found. Please check the file path.")
    except pd.errors.EmptyDataError:
        print("Error: The file is empty.")
    except pd.errors.ParserError:
        print("Error: The file could not be parsed. Please check the file format.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def downsample_minute_data(data: pd.DataFrame, n: int) -> pd.DataFrame:
    """
    Downsample minute data into N-minute intervals by retaining every Nth row.

    Parameters:
        data (pd.DataFrame): The original DataFrame with a datetime index.
        n (int): The number of minutes for the downsampling interval.

    Returns:
        pd.DataFrame: Downsampled DataFrame.
    """
    print("\n========---> Downsampling the data! \n")
    data = data.copy()
    # Ensure the index is a DatetimeIndex
    if not isinstance(data.index, pd.DatetimeIndex):
        try:
            data.index = pd.to_datetime(data.index)  # Convert to DatetimeIndex
        except Exception as e:
            raise ValueError("The DataFrame index could not be converted to DatetimeIndex.") from e

    # Filter rows where the minute index modulo N is 0
    downsampled_data = data[data.index.minute % n == 0]

    return downsampled_data

def add_indicators(data: pd.DataFrame, output_path: str, number_days: int = None, preview_rows: int = 5, freq: str = '1min') -> None:
    """
    Adds technical indicators to a financial dataset, trims the dataset to the last N days, 
    and saves the enriched dataset to a CSV file.

    Args:
        data (pd.DataFrame): The input financial dataset. Must contain at least the following columns: 
                             'open', 'high', 'low', 'close', and 'volume'. The index should be a datetime index.
        output_path (str): The file path where the enriched dataset will be saved as a CSV file.
        number_days (int): The number of days to retain from the most recent date in the dataset.
        preview_rows (int, optional): Number of rows to preview during various stages of processing. Default is 5.
        freq (str): Frequency for the continuous time index. Default is '1min'.

    Returns:
        None: The function saves the processed dataset to a file and does not return a value.
    """
    try:
        print("\n========---> Adding indicators to the data!")
        if number_days is not None:
            # Trim data to the last 190 days
            last_date = data.index.max()  # Get the most recent date in the dataset
            cutoff_date = last_date - pd.Timedelta(days=number_days)  # Calculate the cutoff date
            data = data[data.index >= cutoff_date]  # Keep only rows within the last 190 days
            print(f"\nData trimmed to the last {number_days} days from {cutoff_date} to {last_date}.\n")

        # Sort data in ascending order by date for proper indicator calculations
        data = data.sort_index(ascending=True)
        print(f"\nPreview of the first {preview_rows} rows after reversing: \n")
        display(data.head(preview_rows))

        # Verify the presence of required columns
        required_columns = ['open', 'high', 'low', 'close', 'volume']
        if not all(col in data.columns for col in required_columns):
            raise ValueError("Dataset is missing required columns: 'open', 'high', 'low', 'close', 'volume'.")

        #-----------------------------------------------------------------
        # Get missing timestamps
        missing_timestamps = pd.date_range(
            start=data.index.min(), # Returns smallest/earliest/oldest date
            end=data.index.max(),
            freq=freq,  # Use 'min' for a frequency of 1 minute, '30s' for a frequency of 30 seconds
            tz=data.index.tz,
        ).difference(data.index)

        # Generate a continuous time index (1-minute frequency)
        full_time_index = pd.date_range(
            start=data.index.min(),
            end=data.index.max(),
            freq=freq, 
            tz=data.index.tz,
        )
        index_name = data.index.name
        
        # Reindex the DataFrame to include all timestamps, introducing NaNs for missing timestamps
        data = data.reindex(full_time_index)
        data.index.name = index_name  # Restore index name after reindexing

        # Fill missing data with the previous available value
        data = data.ffill()  # Forward-fill missing values

        # Check if there are missing timestamps
        if missing_timestamps.empty:
            print("\nNo missing timestamps.\n")
        else:
            for timestamp in missing_timestamps:
                print(f"\nMissing timestamp: {timestamp}")
                try:
                    # Safely access the row after reindexing and forward-filling
                    print(f"Data at missing timestamp ({timestamp}):")
                    print(data.loc[timestamp], '\n')
                except KeyError:
                    print(f"No data available for timestamp {timestamp}\n")

        # Get missing timestamps
        missing_timestamps = pd.date_range(
            start=data.index.min(), # Returns smallest/earliest/oldest date
            end=data.index.max(),
            freq=freq,  # Use 'min' for a frequency of 1 minute, '30s' for a frequency of 30 seconds
            tz=data.index.tz,
        ).difference(data.index)
        print(f"\nMissing timestamps time: \n{missing_timestamps}\n")
        #-----------------------------------------------------------------


        # Drop unnecessary columns if present
        columns_to_drop = ['volume weighted average', 'barCount']  # Adjust if needed
        data = data.drop(columns=[col for col in columns_to_drop if col in data.columns], errors='ignore')

        # Add Technical Indicators

        ## Trend Indicators
        # Simple Moving Averages (SMA)
        data['SMA_5'] = trend.sma_indicator(data['close'], window=5)
        data['SMA_10'] = trend.sma_indicator(data['close'], window=10)
        data['SMA_50'] = trend.sma_indicator(data['close'], window=50)

        # Exponential Moving Average (EMA)
        data['EMA_10'] = trend.ema_indicator(data['close'], window=10)

        # Moving Average Convergence Divergence (MACD)
        data['MACD'] = trend.macd(data['close'])
        data['MACD_signal'] = trend.macd_signal(data['close'])

        # Bollinger Bands
        data['BB_upper'] = volatility.bollinger_hband(data['close'], window=20, window_dev=2)
        data['BB_middle'] = volatility.bollinger_mavg(data['close'], window=20)
        data['BB_lower'] = volatility.bollinger_lband(data['close'], window=20)

        ## Momentum Indicators
        # Relative Strength Index (RSI)
        data['RSI_14'] = momentum.rsi(data['close'], window=14)

        # Rate of Change (ROC)
        data['ROC_10'] = momentum.roc(data['close'], window=10)

        ## Volume Indicators
        # On-Balance Volume (OBV)
        data['OBV'] = volume.on_balance_volume(data['close'], data['volume'])

        # Accumulation/Distribution Line (A/D Line)
        data['AD_Line'] = volume.acc_dist_index(data['high'], data['low'], data['close'], data['volume'])

        ## Volatility Indicators
        # Average True Range (ATR)
        data['ATR_14'] = volatility.average_true_range(data['high'], data['low'], data['close'], window=14)

        # Bollinger Band Width (BBW)
        data['BBW'] = (data['BB_upper'] - data['BB_lower']) / data['BB_middle']

        # Adjust decimal precision as needed
        data = data.round(decimals=9) 

        # Replace 0 values with NaN to allow forward-fill
        data = data.replace(0, pd.NA)
        
        # Forward-fill to replace NaN (originally 0) with the previous row value
        data = data.ffill()

        # Drop rows with NaN values (optional, as technical indicators often result in NaN for initial (oldest dates) rows)
        data = data.dropna()

        # Sort the dataset back to descending order for final output
        data = data.sort_index(ascending=False)

        print(f"\nPreview of the first {preview_rows} rows with indicators: \n")
        display(data.head(preview_rows))

        print(f"\nPreview of the last {preview_rows} rows with indicators: \n")
        display(data.tail(preview_rows))

        # Save the enriched dataset
        data.to_csv(output_path)
        print(f"\nEnriched dataset saved to \n\t{output_path}\n")

    except Exception as e:
        print(f"\nError adding indicators: {e}\n")
        print("\nDetailed traceback:")
        traceback.print_exc()

# Function to calculate percentage changes
def calculate_percentage_changes(data: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate percentage changes for a given dataset and return the result in ascending order.
    The `real_close` column is added after the percentage changes calculation.

    Parameters:
        data (pd.DataFrame): The input dataset with a datetime index and numeric values (e.g., closing prices).
                              The DataFrame must already have a properly formatted datetime index.

    Returns:
        pd.DataFrame: A DataFrame containing the percentage changes, indexed by date in ascending order.
                      The 'real_close' column is added, holding the original close prices.
    """
    data = data.sort_index(ascending=True).copy()  # Ensure chronological order
    data_pct_change = data.pct_change() # Calculate percentage changes
    data_pct_change['real_close'] = data['close'] # Add the 'real_close' column (copy of original 'close' column)
    return data_pct_change.dropna()  # Drop rows with NaN values resulting from pct_change()

def calculate_log_returns_all_columns(data: pd.DataFrame, exclude_columns: list = [], dropna: bool = True) -> pd.DataFrame:
    """
    Calculate log returns for all numeric columns in a pandas DataFrame,
    excluding specified columns, and removing excluded columns from the returned DataFrame.

    Args:
        data (pd.DataFrame): Input DataFrame containing numeric data.
        exclude_columns (list): List of columns to exclude from log return calculations and the result.
        dropna (bool): Whether to drop rows with NaN values resulting from the calculation.

    Returns:
        pd.DataFrame: DataFrame with log returns for numeric columns,
                      excluding specified columns.
    """
    data = data.copy().drop(columns=exclude_columns)
    columns_to_transform = data.select_dtypes(include=[np.number]).columns
    print(f"columns_to_transform = \n{columns_to_transform}, \nlen(columns_to_transform) = {len(columns_to_transform)}")

    for col in columns_to_transform:
        # Ensure no negative or zero values
        if (data[col] <= 0).any():
            raise ValueError(f"Column '{col}' contains non-positive values. Log returns require strictly positive values.")
        data[col] = np.log(data[col] / data[col].shift(1))

    # Optionally drop rows with NaN values
    return data.dropna() if dropna else data

# Prepare the data for the model by creating sequences of input features and output targets.
def create_sequences(data: pd.DataFrame, 
                     input_length: int, 
                     output_length: int, 
                     sliding_interval: int = 60) -> tuple[np.ndarray, np.ndarray, pd.Index]:
    """
    Generate input-output sequences with a sliding window.

    Parameters:
        data (pd.DataFrame): DataFrame containing features for time-series modeling, indexed by 'date'.
        input_length (int): Number of timesteps for input sequences (e.g., 2880 for 2 days).
        output_length (int): Number of timesteps for output sequences (e.g., 1440 for 1 day).
        sliding_interval (int): Interval to slide the window (e.g., 60 for 1 hour).

    Returns:
        tuple[np.ndarray, np.ndarray, pd.Index, list, list]:
            - X: Input sequences of shape (num_sequences, input_length, num_features).
            - y: Output sequences of shape (num_sequences, output_length, num_features).
            - indices: A pandas Index object with the start dates of each input sequence for reference.
            - end_start_X: The real end values for each input sequence.
    """
    data = data.copy()
    # Preserve the date index for later reference
    original_index = data.index
    # print(original_index[:5])

    # Reset index to allow slicing by integer position
    data = data.reset_index(drop=True)  # Removes the 'date' column which was the index
    print("\nColumns after resetting index with drop=True: \n", data.columns)
    close_column_index = data.columns.get_loc('close')
    print(f"'close' column index: {close_column_index}")
    num_columns = data.shape[1]
    print(f"Number of columns: {num_columns}")
    print('')
    
    X, y, indices = [], [], []
    real_close_X = []
    total_length = len(data)

    for start in range(0, total_length - input_length - output_length + 1, sliding_interval):
        end_input = start + input_length
        end_output = end_input + output_length

        # Slice input and output sequences, excluding 'real_close' column
        X.append(data.iloc[start:end_input].drop(columns=['real_close']).values)
        y.append(data.iloc[end_input:end_output].values)
        
        # Save the start date of the sequence
        indices.append(original_index[start])

        # Get the real 'close' value at the start of the input and output sequences
        real_close_X.append(data.iloc[start:end_input]['real_close'])
    
    return np.array(X), np.array(y), pd.Index(indices), np.array(real_close_X)

def created_sequences_2(data: pd.DataFrame, sequence_length: int = 60, sliding_interval: int = 60) -> list:
    """
    Divide the dataset into sequences based on the sequence_length.
    Each sequence must fully cover the window size.

    Args:
    - data (pd.DataFrame): The input DataFrame.
    - sequence_length (int): The window size for sequences.

    Returns:
    - sequences (list): A list of sequences (as DataFrames).
    """
    sequences = []
    for i in range(0, len(data) - sequence_length + 1, sliding_interval):
        # print(f"Processing sequence starting at index: {i}")
        seq = data.iloc[i:i + sequence_length].copy()
        sequences.append(seq)
    # print(f"Total sequences created: {len(sequences)}")
    return sequences

def prepare_percentage_change_data(df: pd.DataFrame, 
                                   features: list, 
                                   target: str = 'close', 
                                   window_size: int = 50, 
                                   test_size: float = 0.25, 
                                   shuffle: bool = False) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Prepares percentage change stock data for training a sequence-based model.

    Parameters:
        df (pd.DataFrame): The percentage change data.
        features (list): List of column names to use as input features.
        target (str): The column name to predict.
        window_size (int): The size of the sliding window.
        test_size (float): The proportion of the dataset to include in the test split.
        shuffle (bool): Whether to shuffle the data before splitting.

    Returns:
        X_train, X_test, y_train, y_test: Prepared train and test datasets.
    """
    # Create sequences
    sequences = []
    targets = []
    for i in range(len(df) - window_size):
        sequences.append(df[features].iloc[i:i + window_size].values)
        targets.append(df[target].iloc[i + window_size])  # Predict the target at the next step
    
    sequences = np.array(sequences)
    targets = np.array(targets)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(sequences, targets, test_size=test_size, shuffle=shuffle)

    return X_train, X_test, y_train, y_test

def moving_average(data, window_size=5):
    data = data.sort_index(ascending=True).copy()
    return data.rolling(window=window_size).mean()

def exponential_moving_average(data, span=5):
    data = data.sort_index(ascending=True).copy()
    return data.ewm(span=span, adjust=False).mean()

def gaussian_smoothing(data: pd.DataFrame, sigma=2) -> pd.DataFrame:
    """
    Applies Gaussian smoothing to numeric columns in a DataFrame and ensures the index is sorted in ascending order.

    Args:
        data (pd.DataFrame): Input DataFrame.
        sigma (float): Standard deviation for the Gaussian kernel. Defaults to 2.

    Returns:
        pd.DataFrame: A DataFrame with smoothed numeric columns and sorted index.
    """
    # Sort the DataFrame by index in ascending order
    data = data.sort_index(ascending=True).copy()
    for column in data.columns:
        if pd.api.types.is_numeric_dtype(data[column]):  # Only apply to numeric columns
            data[column] = gaussian_filter1d(data[column].values, sigma=sigma)
    return data

def detect_trends(
    dataframe: pd.DataFrame, 
    column: str = 'close', 
    lower_threshold: float = 0.10, 
    upper_threshold: float = 0.20
) -> pd.DataFrame:
    """
    Processes a DataFrame to calculate log returns and categorize trends based on thresholds.

    Args:
        dataframe (pd.DataFrame): Input DataFrame containing the price data.
        column (str): Column name for price data. Defaults to 'close'.
        lower_threshold (float): Threshold for categorizing moderate trends. Defaults to 0.10.
        upper_threshold (float): Threshold for categorizing strong trends. Defaults to 0.20.

    Returns:
        pd.DataFrame: A DataFrame with added columns:
                      - 'log_return': Logarithmic returns of the specified column.
                      - 'trend': Categorized trend values:
                          - -25 for very strong negative trend
                          - -15 for moderate negative trend
                          - 0 for no trend
                          - 15 for moderate positive trend
                          - 25 for very strong positive trend
    """
    # Copy to avoid modifying the original DataFrame
    df = dataframe.copy()
    
    # Calculate log returns
    df['log_return'] = np.log(df[column] / df[column].shift(1))
    
    # Function to categorize trends
    def categorize_trend(log_return):
        if log_return < -upper_threshold:
            return -25
        elif -upper_threshold <= log_return < -lower_threshold:
            return -15
        elif -lower_threshold <= log_return <= lower_threshold:
            return 0
        elif lower_threshold < log_return <= upper_threshold:
            return 15
        else:  # log_return > upper_threshold
            return 25
    
    # Apply trend categorization
    df['trend'] = df['log_return'].apply(categorize_trend)

    # Drop NaN values caused by shift
    return df.dropna()

def detect_trends_2(
    dataframe: pd.DataFrame, 
    column: str = 'close', 
    lower_threshold: float = 0.10, 
    upper_threshold: float = 0.20
) -> pd.DataFrame:
    """
    Processes a DataFrame to calculate log returns and categorize trends using interval bins.

    Args:
        dataframe (pd.DataFrame): Input DataFrame containing the price data.
        column (str): Column name for price data. Defaults to 'close'.
        lower_threshold (float): Threshold for categorizing moderate trends. Defaults to 0.10.
        upper_threshold (float): Threshold for categorizing strong trends. Defaults to 0.20.

    Returns:
        pd.DataFrame: A DataFrame with added columns:
                      - 'log_return': Logarithmic returns of the specified column.
                      - 'trend': Categorized trend values:
                          - -25 for very strong negative trend
                          - -15 for moderate negative trend
                          - 0 for no trend
                          - 15 for moderate positive trend
                          - 25 for very strong positive trend
    """
    # Copy to avoid modifying the original DataFrame
    df = dataframe.copy()

    # Calculate log returns
    df['log_return'] = np.log(df[column] / df[column].shift(1))
    
    # Define bins and corresponding labels for trends
    bins = [-np.inf, -upper_threshold, -lower_threshold, lower_threshold, upper_threshold, np.inf]
    labels = [-25, -15, 0, 15, 25]
    
    # Categorize trends using pd.cut
    df['trend'] = pd.cut(df['log_return'], bins=bins, labels=labels, right=True)
    
    # Replace NaN trends with 0 (default value for no trend)
    df['trend'] = df['trend'].fillna(0).astype(int)

    # Drop NaN values caused by shift
    return df.dropna()

def detect_trends_3(
    dataframe: pd.DataFrame, 
    column: str = 'close', 
    lower_threshold: float = 0.001, 
    upper_threshold: float = 0.02,
    reverse_steps: int = 7
) -> pd.DataFrame:
    """
    Detects trends based on log return data provided in a specified column and categorizes them into different strength levels.

    This function analyzes time-series data by evaluating cumulative trends in log return values provided in the input DataFrame. It uses three dictionaries (`dic1`, `dic2`, `dic3`) to track different phases of trends, handles multi-step reversals, and classifies trends dynamically based on cumulative product thresholds and specified thresholds for trend strengths.

    Args:
        dataframe (pd.DataFrame): Input DataFrame containing log return data.
        column (str): Column name containing log return values. Defaults to 'close'.
        lower_threshold (float): Threshold for categorizing moderate trends. Defaults to 0.001.
        upper_threshold (float): Threshold for categorizing strong trends. Defaults to 0.02.
        reverse_steps (int): Number of consecutive steps to confirm a trend reversal. Defaults to 7.

    Returns:
        pd.DataFrame: A DataFrame with an added column:
                    - 'trend': Categorized trend values based on the detected phases:
                        - 0: No trend
                        - 1: Moderate negative trend
                        - 2: Very strong negative trend
                        - 3: Moderate positive trend
                        - 4: Very strong positive trend

    Function Details:
    1. **Input Assumption**:
    - The input DataFrame already contains log return data in the specified column (`column`).

    2. **Trend Tracking**:
    - Uses dictionaries to monitor trends:
        - `dic1`: Tracks the first phase of the trend.
        - `dic2`: Tracks the second phase if a reversal occurs.
        - `dic3`: Tracks the third phase if another reversal occurs.

    3. **Cumulative Product**:
    - Calculates the cumulative product of `(1 + log_return)` from the specified column to evaluate the strength of trends.

    4. **Reversal Handling**:
    - If a trend reversal persists beyond `reverse_steps`, labels are assigned based on the cumulative product tracked in `dic1`.
    - Subsequent reversals are merged or labeled independently if conditions are met.

    5. **Label Assignment**:
    - Labels are dynamically assigned based on cumulative product thresholds for positive and negative trends:
        - Positive trends are categorized as moderate (3) or strong (4).
        - Negative trends are categorized as moderate (1) or strong (2).

    6. **Edge Cases**:
    - Properly handles scenarios where data points are insufficient for trend analysis or when trend phases overlap, ensuring all data points are labeled.
    """
    # Copy to avoid modifying the original DataFrame
    df = dataframe.copy()
    df['trend'] = None  # Default value 
    
    # print("\n#-------------------- Working on 'trend' patterns -----------------------#")
    dic1, dic2, dic3 = None, None, None # Initialize trend tracking dictionaries
    # dic1 = None # {'ids': [], 'last_sign': None, 'cumulative': 1.0}
    
    def assign_label(dictio_, lower_threshold, upper_threshold):
        cumulative = dictio_['cumulative']
        # print(f"cumulative = {cumulative}")
        if cumulative > (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 4  # Very strong positive
        elif (1 + lower_threshold) < cumulative <= (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 3  # Moderate positive
        elif (1 - upper_threshold) < cumulative <= (1 - lower_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 1  # Moderate negative
        elif cumulative <= (1 - upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 2  # Very strong negative
        else:
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 0  # No trend
    
    #----------------------- For Loop -----------------------#
    for idx, log_ret in enumerate(df[column]):
        sign = 1 if log_ret > 0 else -1

        if dic1 is None:  # Initialize dic1
            # print(f"\nThis one time condition 'if loop' is running \n")
            dic1 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic1['last_sign']
        if sign == last_sign and dic2 is None:  # Continue same trend
            dic1['ids'].append(idx)
            dic1['last_sign'] = sign
            dic1['cumulative'] *= (1 + log_ret)
            continue

        # 1st Reversal occuring
        if dic2 is None:  # Start dic2
            dic2 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic2['last_sign']
        if sign == last_sign and dic3 is None:  # Continue same trend
            dic2['ids'].append(idx)
            dic2['last_sign'] = sign
            dic2['cumulative'] *= (1 + log_ret)
            if len(dic2['ids']) == reverse_steps:
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                # print(f"dic1['cumulative'] = {dic1['cumulative']}, and dic1['ids'] = {dic1['ids']}")
                dic1 = dic2
                dic2 = None
                # print(f"dic1 after trend reversal persisted and dic1 = dic2 = \n{dic1}")
                # print(f"dic2 after being reset: {dic2}\n")
            continue

        # 2nd Reversal occuring
        if dic3 is None:  # Start dic3
            dic3 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic3['last_sign']
        if sign == last_sign: # Continue same trend, there is no dic4 to check if is None
            dic3['ids'].append(idx)
            dic3['last_sign'] = sign
            dic3['cumulative'] *= (1 + log_ret)
            dic_prod = dic2['cumulative'] * dic3['cumulative']
            # if (sign == 1 and dic1['cumulative'] * dic_prod > dic1['cumulative']) or (sign == -1 and dic1['cumulative'] * dic_prod < dic1['cumulative'])):
            if (sign == 1 and dic_prod > 1) or (sign == -1 and dic_prod < 1): # More beautiful
                # Merge dic1, dic2, and dic3
                dic1['ids'] += dic2['ids'] + dic3['ids']
                dic1['last_sign'] = dic3['last_sign']
                dic1['cumulative'] *= dic2['cumulative'] * dic3['cumulative']
                dic2, dic3 = None, None
                continue

            if len(dic3['ids']) == reverse_steps:      
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                assign_label(dic2, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                dic1 = dic3
                dic2, dic3 = None, None
                # print(f"dic2 after 2nd trend reversal didn't catch up fast enough, and now \ndic1 = dic3 = {dic1}")
                # print(f"dic3 and dic2 after being reset: {dic3}\n")
            continue
            
        # 3rd Reversal occuring
        assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
        # Reassign values
        dic1 = dic2
        dic2 = dic3
        dic3 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
        # print(f"There was a 3rd trend reversal, and now \ndic1 = dic2 = {dic1}, \ndic2 = dic3 = {dic2}")
        # print(f"dic3 after being reset: {dic3}\n")

    # Assign remaining labels
    if dic1:
        assign_label(dic1, lower_threshold, upper_threshold)
    if dic2:
        assign_label(dic2, lower_threshold, upper_threshold)
    if dic3:
        assign_label(dic3, lower_threshold, upper_threshold)
    # print("\n#-------------------- Returning 'trend' patterns ------------------------#")
    
    return df

def detect_trends_4(
    dataframe: pd.DataFrame, 
    column: str = 'close', 
    lower_threshold: float = 0.001, 
    upper_threshold: float = 0.02,
    reverse_steps: int = 7,
    trends_to_keep: set = {0, 1, 2, 3, 4}  # Default keeps all trends
) -> pd.DataFrame:
    """
    Detects trends based on log return data provided in a specified column and categorizes them into different strength levels.

    This function analyzes time-series data by evaluating cumulative trends in log return values provided in the input DataFrame. 
    It uses three dictionaries (`dic1`, `dic2`, `dic3`) to track different phases of trends, handles multi-step reversals, and 
    classifies trends dynamically based on cumulative product thresholds and specified thresholds for trend strengths.

    Args:
        dataframe (pd.DataFrame): Input DataFrame containing log return data.
        column (str): Column name containing log return values. Defaults to 'close'.
        lower_threshold (float): Threshold for categorizing moderate trends. Defaults to 0.001.
        upper_threshold (float): Threshold for categorizing strong trends. Defaults to 0.02.
        reverse_steps (int): Number of consecutive steps to confirm a trend reversal. Defaults to 7.
        trends_to_keep (set): A set of trend categories to retain; others will be set to 0 (No Trend). Defaults to keeping all trends {0, 1, 2, 3, 4}.

    Returns:
        pd.DataFrame: A DataFrame with an added column:
                    - 'trend': Categorized trend values based on the detected phases:
                        - 0: No trend
                        - 1: Moderate negative trend
                        - 2: Very strong negative trend
                        - 3: Moderate positive trend
                        - 4: Very strong positive trend
                      Any trends not included in `trends_to_keep` will be reset to 0.

    Function Details:
    1. **Input Assumption**:
    - The input DataFrame already contains log return data in the specified column (`column`).

    2. **Trend Tracking**:
    - Uses dictionaries to monitor trends:
        - `dic1`: Tracks the first phase of the trend.
        - `dic2`: Tracks the second phase if a reversal occurs.
        - `dic3`: Tracks the third phase if another reversal occurs.

    3. **Cumulative Product**:
    - Calculates the cumulative product of `(1 + log_return)` from the specified column to evaluate the strength of trends.

    4. **Reversal Handling**:
    - If a trend reversal persists beyond `reverse_steps`, labels are assigned based on the cumulative product tracked in `dic1`.
    - Subsequent reversals are merged or labeled independently if conditions are met.

    5. **Label Assignment**:
    - Labels are dynamically assigned based on cumulative product thresholds for positive and negative trends:
        - Positive trends are categorized as moderate (3) or strong (4).
        - Negative trends are categorized as moderate (1) or strong (2).

    6. **Trend Filtering**:
    - After detecting trends, only those specified in `trends_to_keep` remain unchanged.
    - Any trend category not included in `trends_to_keep` is reset to 0 (No Trend).

    7. **Edge Cases**:
    - Properly handles scenarios where data points are insufficient for trend analysis or when trend phases overlap, ensuring all data points are labeled.
    """
    # Copy to avoid modifying the original DataFrame
    df = dataframe.copy()
    df['trend'] = None  # Default value 
    
    # print("\n#-------------------- Working on 'trend' patterns -----------------------#")
    dic1, dic2, dic3 = None, None, None # Initialize trend tracking dictionaries
    # dic1 = None # {'ids': [], 'last_sign': None, 'cumulative': 1.0}
    
    def assign_label(dictio_, lower_threshold, upper_threshold):
        cumulative = dictio_['cumulative']
        # print(f"cumulative = {cumulative}")
        if cumulative > (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 4  # Very strong positive
        elif (1 + lower_threshold) < cumulative <= (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 3  # Moderate positive
        elif (1 - upper_threshold) < cumulative <= (1 - lower_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 1  # Moderate negative
        elif cumulative <= (1 - upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 2  # Very strong negative
        else:
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 0  # No trend
    
    #----------------------- For Loop -----------------------#
    for idx, log_ret in enumerate(df[column]):
        sign = 1 if log_ret > 0 else -1

        if dic1 is None:  # Initialize dic1
            # print(f"\nThis one time condition 'if loop' is running \n")
            dic1 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic1['last_sign']
        if sign == last_sign and dic2 is None:  # Continue same trend
            dic1['ids'].append(idx)
            dic1['last_sign'] = sign
            dic1['cumulative'] *= (1 + log_ret)
            continue

        # 1st Reversal occuring
        if dic2 is None:  # Start dic2
            dic2 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic2['last_sign']
        if sign == last_sign and dic3 is None:  # Continue same trend
            dic2['ids'].append(idx)
            dic2['last_sign'] = sign
            dic2['cumulative'] *= (1 + log_ret)
            if len(dic2['ids']) == reverse_steps:
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                # print(f"dic1['cumulative'] = {dic1['cumulative']}, and dic1['ids'] = {dic1['ids']}")
                dic1 = dic2
                dic2 = None
                # print(f"dic1 after trend reversal persisted and dic1 = dic2 = \n{dic1}")
                # print(f"dic2 after being reset: {dic2}\n")
            continue

        # 2nd Reversal occuring
        if dic3 is None:  # Start dic3
            dic3 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue
        last_sign = dic3['last_sign']
        if sign == last_sign: # Continue same trend, there is no dic4 to check if is None
            dic3['ids'].append(idx)
            dic3['last_sign'] = sign
            dic3['cumulative'] *= (1 + log_ret)
            dic_prod = dic2['cumulative'] * dic3['cumulative']
            # if (sign == 1 and dic1['cumulative'] * dic_prod > dic1['cumulative']) or (sign == -1 and dic1['cumulative'] * dic_prod < dic1['cumulative'])):
            if (sign == 1 and dic_prod > 1) or (sign == -1 and dic_prod < 1): # More beautiful
                # Merge dic1, dic2, and dic3
                dic1['ids'] += dic2['ids'] + dic3['ids']
                dic1['last_sign'] = dic3['last_sign']
                dic1['cumulative'] *= dic2['cumulative'] * dic3['cumulative']
                dic2, dic3 = None, None
                continue

            if len(dic3['ids']) == reverse_steps:      
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                assign_label(dic2, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                dic1 = dic3
                dic2, dic3 = None, None
                # print(f"dic2 after 2nd trend reversal didn't catch up fast enough, and now \ndic1 = dic3 = {dic1}")
                # print(f"dic3 and dic2 after being reset: {dic3}\n")
            continue
            
        # 3rd Reversal occuring
        assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
        # Reassign values
        dic1 = dic2
        dic2 = dic3
        dic3 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
        # print(f"There was a 3rd trend reversal, and now \ndic1 = dic2 = {dic1}, \ndic2 = dic3 = {dic2}")
        # print(f"dic3 after being reset: {dic3}\n")

    # Assign remaining labels
    if dic1:
        assign_label(dic1, lower_threshold, upper_threshold)
    if dic2:
        assign_label(dic2, lower_threshold, upper_threshold)
    if dic3:
        assign_label(dic3, lower_threshold, upper_threshold)
    # print("\n#-------------------- Returning 'trend' patterns ------------------------#")
    
    # Apply filtering: Keep only selected trends, set others to 0
    df['trend'] = df['trend'].apply(lambda x: x if x in trends_to_keep else 0)

    return df

def split_X_y(sequences: list[pd.DataFrame], 
              target_column: str = 'trend',
              detect_trends_function: Callable[[pd.DataFrame, str, float, float, int, set], pd.DataFrame] = detect_trends_4, 
              column: str = 'close', 
              lower_threshold: float = 0.0009, 
              upper_threshold: float = 0.015,
              reverse_steps: int = 7,
              trends_to_keep: set = {0, 1, 2, 3, 4}) -> Tuple[np.ndarray, np.ndarray]:
    """
    Process sequences to generate features (X) and labels (y) while applying trend detection.

    Args:
    - sequences (list of pd.DataFrame): List of DataFrame sequences.
    - lower_threshold (float): Lower threshold for trend detection.
    - upper_threshold (float): Upper threshold for trend detection.
    - reverse_steps (int): Steps to reverse trends in the sequence.
    - target_column (str): Column name to use as the label (default: 'trend').
    - detect_trends_function (Callable): Function for detecting trends, defaults to `detect_trends_4`.
    - trends_to_keep (set): A set of trend categories to retain; others will be set to 0 (No Trend).

    Returns:
    - X (np.ndarray): Features array of shape (num_sequences, sequence_length, num_features).
    - y (np.ndarray): Labels array of shape (num_sequences,).
    """
    X = []
    y = []
    # count = 0
    for seq in sequences:
        # Apply trend detection on the sequence
        seq = detect_trends_function(seq, column=column, 
                                     lower_threshold=lower_threshold, 
                                     upper_threshold=upper_threshold, 
                                     reverse_steps=reverse_steps,
                                     trends_to_keep=trends_to_keep)
        # if count == 0:
        #     count = 1
        #     print(f"\nseq.head()")
        #     display(seq.head())
        #     print()
        # Extract features (X) and labels (y)
        X.append(seq.drop(columns=[target_column]).values)  # All but the target column
        y.append(seq[target_column].values)  # Target column as labels
        
    # return np.array(X, dtype=np.float32), np.array(y, dtype=np.int64)
    return np.array(X), np.array(y)

def relabel_split_data(
    data: pd.DataFrame, 
    detect_trends_function: Callable[[pd.DataFrame, str, float, float, int], pd.DataFrame], 
    column: str = 'close', 
    lower_threshold: float = 0.0009, 
    upper_threshold: float = 0.015,
    reverse_steps: int = 7
) -> pd.DataFrame:
    """
    Relabels the start and end of a DataFrame based on trend consistency.

    Args:
        data (pd.DataFrame): The DataFrame to relabel.
        detect_trends_function (Callable): A callable function to detect trends.
        column (str): Column name for trend detection. Defaults to 'close'.
        lower_threshold (float): Lower threshold for trend detection.
        upper_threshold (float): Upper threshold for trend detection.
        reverse_steps (int): Number of steps for reversal detection.

    Returns:
        pd.DataFrame: The relabeled DataFrame.
    """
    data = data.copy()

    # Get the start and end labels
    start_label = data['trend'].iloc[0]
    end_label = data['trend'].iloc[-1]

    # Identify start segment
    if start_label != 0:
        start_segment = []
        for idx, label in enumerate(data['trend']):
            if label == start_label:
                start_segment.append(idx)
            else:
                break
        start_segment = data.iloc[start_segment][[column]].copy()
        print(f"Start segment's label is not 0, it is {start_label} and data: ")
        display(data.loc[start_segment.index])
        # Apply trend detection on the start and end segments and Update the 'trend' column for the start and end segments
        if not start_segment.empty:
            start_segment_relabel = detect_trends_function(start_segment, column, lower_threshold, upper_threshold, reverse_steps)
            data.loc[start_segment.index, 'trend'] = start_segment_relabel['trend']
        print("Start segment after new label added: ")
        display(data.loc[start_segment.index])
    else:
        print(f"The start segment has a label of '{start_label}', so no operation needed. ")

    # Identify end segment
    if end_label != 0:
        end_segment = []
        for idx in range(len(data) - 1, -1, -1):
            if data['trend'].iloc[idx] == end_label:
                end_segment.append(idx)
            else:
                break
        end_segment = data.iloc[sorted(end_segment)][[column]].copy()
        print(f"End segment's label is not 0, it is {end_label} and data: ")
        display(data.loc[end_segment.index])
        # Apply trend detection on the start and end segments and Update the 'trend' column for the start and end segments
        if not end_segment.empty:
            end_segment_relabel = detect_trends_function(end_segment, column, lower_threshold, upper_threshold, reverse_steps)
            data.loc[end_segment.index, 'trend'] = end_segment_relabel['trend']
        print("End segment after new label added: ")
        display(data.loc[end_segment.index])
    else:
        print(f"The end segment has a label of '{end_label}', so no operation needed. ")

    return data

def process_and_return_splits(
    with_indicators_file_path: str,
    downsampled_data_minutes: int,
    exclude_columns: list[str],
    lower_threshold: float,
    upper_threshold: float,
    reverse_steps: int,
    sequence_length: int,
    sliding_interval: int,
    trends_to_keep: set = {0, 1, 2, 3, 4}  # Default keeps all trends
) -> tuple[
    list[list[float]],  # X_train: List of sequences, each containing a list of features
    list[list[int]],    # y_train: List of sequences, each containing a list of labels
    list[list[float]],  # X_val: List of sequences, each containing a list of features
    list[list[int]],    # y_val: List of sequences, each containing a list of labels
    list[list[float]],  # X_test: List of sequences, each containing a list of features
    list[list[int]]     # y_test: List of sequences, each containing a list of labels
]:
    """
    Processes time-series data from a CSV file and prepares it for machine learning.

    This function performs the following steps:
        1. Reads data from the specified CSV file and sorts it by date in descending order.
        2. Optionally downsamples the data to a lower frequency (e.g., 5-minute intervals).
        3. Applies Gaussian smoothing to reduce noise in the data.
        4. Calculates log returns for all numeric columns, excluding specified columns.
        5. Detects trends based on defined thresholds (`lower_threshold`, `upper_threshold`, and `reverse_steps`).
        6. Filters trends to keep only those specified in `trends_to_keep`, setting others to 0 (No Trend).
        7. Converts the processed data into sequences of a fixed length (`sequence_length`) with a sliding interval.
        8. Splits the sequences into training (80%), validation (10%), and test (10%) sets.
        9. Further splits the sequences into features (`X`) and labels (`y`) for supervised learning.

    Args:
        with_indicators_file_path (str): Path to the CSV file containing the time-series data.
        downsampled_data_minutes (int): Frequency for downsampling the data (e.g., 1 for no downsampling).
        exclude_columns (list[str]): List of column names to exclude from log return calculations.
        lower_threshold (float): Lower threshold for trend detection.
        upper_threshold (float): Upper threshold for trend detection.
        reverse_steps (int): Number of steps for reversing trends in trend detection.
        sequence_length (int): Length of sequences to create from the data.
        sliding_interval (int): Interval for sliding the window when creating sequences.
        trends_to_keep (set): A set of trend categories to retain; others will be set to 0 (No Trend). Defaults to keeping all trends {0, 1, 2, 3, 4}.

    Returns:
        tuple[list[list[float]], list[list[int]], list[list[float]], list[list[int]], list[list[float]], list[list[int]]]:
            A tuple containing:
            - X_train (list[list[float]]): Input sequences for training.
            - y_train (list[list[int]]): Target sequences for training.
            - X_val (list[list[float]]): Input sequences for validation.
            - y_val (list[list[int]]): Target sequences for validation.
            - X_test (list[list[float]]): Input sequences for testing.
            - y_test (list[list[int]]): Target sequences for testing.

    Example:
        X_train, y_train, X_val, y_val, X_test, y_test = process_and_return_splits(
            with_indicators_file_path="data.csv",
            downsampled_data_minutes=5,
            exclude_columns=["volume"],
            lower_threshold=-0.05,
            upper_threshold=0.05,
            reverse_steps=3,
            sequence_length=50,
            sliding_interval=5,
            trends_to_keep={1, 2, 3, 4}  # Only keep categorized trends, set others to 0
        )
    """

    data_retrieved = read_csv_file(with_indicators_file_path, preview_rows=0) # 190 days of data
    data_retrieved = data_retrieved.sort_index(ascending=False)

    #---------------------------------------------------------------------------------------
    if downsampled_data_minutes != 1:
        print("Downsampling the data! \n")
        data_retrieved = downsample_minute_data(data_retrieved, downsampled_data_minutes)
    #---------------------------------------------------------------------------------------

    # Get missing timestamps
    missing_timestamps = pd.date_range(
        start=data_retrieved.index.min(), # Returns smallest/earliest/oldest date
        end=data_retrieved.index.max(),
        freq='1min',  # Use 'min' for a frequency of 1 minute, '30s' for a frequency of 30 seconds
        tz=data_retrieved.index.tz,
    ).difference(data_retrieved.index)
    print(f"\ndata_retrieved - Missing timestamps time: \n{missing_timestamps}") 

    data_gaussian = gaussian_smoothing(data_retrieved, sigma=7)

    # Get missing timestamps
    missing_timestamps = pd.date_range(
        start=data_gaussian.index.min(), # Returns smallest/earliest/oldest date
        end=data_gaussian.index.max(),
        freq='1min',  # Use 'min' for a frequency of 1 minute, '30s' for a frequency of 30 seconds
        tz=data_gaussian.index.tz,
    ).difference(data_gaussian.index)
    print(f"\ndata_gaussian - Missing timestamps time: \n{missing_timestamps}\n")

    data_log_return = calculate_log_returns_all_columns(data_gaussian, exclude_columns=exclude_columns)

    # Get missing timestamps
    missing_timestamps = pd.date_range(
        start=data_log_return.index.min(), # Returns smallest/earliest/oldest date
        end=data_log_return.index.max(),
        freq='1min',  # Use 'min' for a frequency of 1 minute, '30s' for a frequency of 30 seconds
        tz=data_log_return.index.tz,
    ).difference(data_log_return.index)
    print(f"\ndata_log_return - Missing timestamps time: \n{missing_timestamps}\n") 

    # Check if there are missing timestamps
    if missing_timestamps.empty:
        print("No missing timestamps.")
    else:
        for timestamp in missing_timestamps:
            print(f"\nMissing timestamp: {timestamp}")
            
            # Create a placeholder for the missing timestamp
            if timestamp not in data_log_return.index:
                print('Missing')
            
            # Get data before and after the missing timestamp
            before_data = data_log_return[data_log_return.index < timestamp].tail(5)  # 5 data points before
            after_data = data_log_return[data_log_return.index > timestamp].head(5)  # 5 data points after
            
            # Display surrounding data
            if not before_data.empty:
                print("\nData before:")
                print(before_data)
            else:
                print("\nNo data available before the missing timestamp.")
            
            if not after_data.empty:
                print("\nData after:")
                print(after_data)
            else:
                print("\nNo data available after the missing timestamp.")

    sequences = created_sequences_2(data_log_return, sequence_length, sliding_interval)

    # Split sequences into training, validation, and test sets
    train_size = int(len(sequences) * 0.8)
    val_size = int(len(sequences) * 0.1)

    train_sequences = sequences[:train_size]
    val_sequences = sequences[train_size:train_size + val_size]
    test_sequences = sequences[train_size + val_size:]

    print(f"""
    Number of sequences:
        - sequences[0].shape: {sequences[0].shape}
        - Total sequences: {len(sequences)}
        - Train sequences: {len(train_sequences)}
        - Validation sequences: {len(val_sequences)}
        - Test sequences: {len(test_sequences)}
    """)

    # Process train, validation, and test sets
    X_train, y_train = split_X_y(train_sequences, 
                                target_column='trend',
                                detect_trends_function = detect_trends_4,
                                column= 'close',
                                lower_threshold=lower_threshold, 
                                upper_threshold=upper_threshold, 
                                reverse_steps=reverse_steps,
                                trends_to_keep=trends_to_keep)

    X_val, y_val = split_X_y(val_sequences, 
                            target_column='trend',
                            detect_trends_function = detect_trends_4,
                            column= 'close',
                            lower_threshold=lower_threshold, 
                            upper_threshold=upper_threshold, 
                            reverse_steps=reverse_steps,
                            trends_to_keep=trends_to_keep)

    X_test, y_test = split_X_y(test_sequences, 
                            target_column='trend',
                            detect_trends_function = detect_trends_4,
                            column= 'close',
                            lower_threshold=lower_threshold, 
                            upper_threshold=upper_threshold, 
                            reverse_steps=reverse_steps,
                            trends_to_keep=trends_to_keep)

    # Checking X arrays
    for idx, seq in enumerate(X_train):  # Loop through sequences
        for sub_idx, feature_set in enumerate(seq):  # Loop through data points
            for feature_idx, feature in enumerate(feature_set):  # Loop through features
                if not isinstance(feature, (float, np.float32)):  # Check each feature
                    print(f"Unexpected type in X_train at sequence {idx}, data point {sub_idx}, feature {feature_idx}: {type(feature)}")

    # Checking y arrays
    for idx, seq in enumerate(y_train):  # Loop through sequences
        for sub_idx, label in enumerate(seq):  # Loop through data points (labels)
            if not isinstance(label, (int, np.int64)):  # Check each label
                print(f"Unexpected type in y_train at sequence {idx}, data point {sub_idx}: {type(label)}")

    # Checking X arrays
    for idx, seq in enumerate(X_val):  # Loop through sequences
        for sub_idx, feature_set in enumerate(seq):  # Loop through data points
            for feature_idx, feature in enumerate(feature_set):  # Loop through features
                if not isinstance(feature, (float, np.float32)):  # Check each feature
                    print(f"Unexpected type in X_val at sequence {idx}, data point {sub_idx}, feature {feature_idx}: {type(feature)}")
    # Checking y arrays
    for idx, seq in enumerate(y_val):  # Loop through sequences
        for sub_idx, label in enumerate(seq):  # Loop through data points (labels)
            if not isinstance(label, (int, np.int64)):  # Check each label
                print(f"Unexpected type in y_val at sequence {idx}, data point {sub_idx}: {type(label)}")

    # Checking X arrays
    for idx, seq in enumerate(X_test):  # Loop through sequences
        for sub_idx, feature_set in enumerate(seq):  # Loop through data points
            for feature_idx, feature in enumerate(feature_set):  # Loop through features
                if not isinstance(feature, (float, np.float32)):  # Check each feature
                    print(f"Unexpected type in X_test at sequence {idx}, data point {sub_idx}, feature {feature_idx}: {type(feature)}")
    # Checking y arrays
    for idx, seq in enumerate(y_test):  # Loop through sequences
        for sub_idx, label in enumerate(seq):  # Loop through data points (labels)
            if not isinstance(label, (int, np.int64)):  # Check each label
                print(f"Unexpected type in y_test at sequence {idx}, data point {sub_idx}: {type(label)}")

    if isinstance(y_train, np.ndarray) and y_train.dtype == np.object_:
        # Convert to numeric if needed
        y_train = np.array(y_train, dtype=np.int64)

    if isinstance(y_val, np.ndarray) and y_val.dtype == np.object_:
        # Convert to numeric if needed
        y_val = np.array(y_val, dtype=np.int64)

    if isinstance(y_test, np.ndarray) and y_test.dtype == np.object_:
        # Convert to numeric if needed
        y_test = np.array(y_test, dtype=np.int64)

    close_col_index = data_log_return.columns.get_loc('close') # 'date' is set as index so doesnt count as a column
    Number_features = X_train.shape[-1]
    print(f"close_col_index = {close_col_index}")
    print(f"Number_features = {Number_features}")

    return X_train, y_train, X_val, y_val, X_test, y_test, Number_features


## All(Initial) parameters here

In [73]:
ticker = 'BTC-USD'
downsampled_data_minutes = 1

# Step 0 (Again): Identify parameters for trend settings of the loaded data with 1,000 data points
lower_threshold = 0.0009 
upper_threshold = 0.015
reverse_steps = 13


exclude_columns= ['MACD', 'MACD_signal', 'ROC_10', 'OBV', 'AD_Line']
# exclude_columns= []#['open', 'high', 'low', 'MACD', 'MACD_signal', 'BB_middle', 'ROC_10', 'OBV', 'AD_Line']

# Step 3, under ### Correlation Analysis
# Compute correlations with the 'trend' column
# corr = data_trends.corr()
# trend_corr = corr['trend'].sort_values(ascending=False)
strongly_correlated = ['close', 'open', 'SMA_5', 'high', 'low', 'EMA_10', 'SMA_10'] # Strongly correlated (correlation > 0.6)
moderately_correlated = ['BB_middle', 'BB_lower', 'BB_upper', 'RSI_14'] # Moderately correlated (correlation between 0.3 and 0.6)
weakly_correlated = ['SMA_50', 'volume', 'BBW', 'ATR_14'] # Weakly correlated or negligible (correlation <~ 0.3)

exclude_columns += weakly_correlated + moderately_correlated
# print(exclude_columns)


sequence_length = 1000
sliding_interval = 60

## **_Build the GRU Model_**

### GPU details

In [74]:
# If not in Jupyter Notebook
# import subprocess
# result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
# print(result.stdout)

!nvidia-smi

Mon Mar 10 02:35:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.14                 Driver Version: 566.14         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   46C    P8              3W /  104W |    3341MiB /   8188MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### Torch and CUDA check

In [75]:
# Check if GPU is available
gpu_available = torch.cuda.is_available()

print("torch.cuda.is_available()", gpu_available)

# If GPU is available, print additional information
if gpu_available:
    print("\nGPU Device Name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())
    print("Total CUDA Cores:", torch.cuda.get_device_properties(0).multi_processor_count * 128)  # NVIDIA GPUs often have 128 cores/SM
    print("Current GPU Device:", torch.cuda.current_device())
else:
    print("No GPU detected.")


torch.cuda.is_available() True

GPU Device Name: NVIDIA GeForce RTX 4060 Laptop GPU
Number of GPUs: 1
Total CUDA Cores: 3072
Current GPU Device: 0


### Model building

#### Bi-Directional GRU with Attention

In [76]:
class BiGRUWithAttention(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int, num_layers: int, dropout: float = 0.0):
        super(BiGRUWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # Bi-Directional GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
        # Attention layer
        self.attention_fc = nn.Linear(hidden_size * 2, hidden_size * 2)  # Hidden size * 2 for bi-directional
        # Fully connected layer for classification
        self.fc = nn.Linear(hidden_size * 2, output_size)
        self.dropout = nn.Dropout(dropout)  # Apply dropout before the fully connected layer
        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)  # Xavier initialization for weights
            elif 'bias' in name:
                nn.init.constant_(param, 0)  # Zero initialization for biases

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # Bi-directional: num_layers * 2
        # Bi-Directional GRU forward pass
        out, _ = self.gru(x, h0)  # Shape: (batch_size, seq_length, hidden_size * 2)

        # Attention mechanism
        attn_weights = torch.tanh(self.attention_fc(out))  # Shape: (batch_size, seq_length, hidden_size * 2)
        out = attn_weights * out  # Element-wise attention application
        out = self.dropout(out)  # Apply dropout

        # Fully connected layer (applied at each time step)
        out = self.fc(out)  # Shape: (batch_size, seq_length, output_size)
        return out


#### New Architecture with LoRA

In [77]:
# Pure LoRA layer: freezes the base linear layer and updates only low-rank matrices.
class LoRALinear(nn.Module):
    def __init__(self, in_features: int, out_features: int, r: int = 4):
        """
        in_features: Input features
        out_features: Output features (number of classes for classification)
        r: Rank of the low-rank update matrices
        """
        super(LoRALinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        # Base/Original linear layer (to be frozen during incremental training)
        self.linear = nn.Linear(in_features, out_features)
        for param in self.linear.parameters():
            param.requires_grad = False  # Freeze base weights
        self.r = r
        # LoRA low-rank adapter parameters (trainable)
        self.A = nn.Parameter(torch.randn(r, in_features) * 0.01)
        self.B = nn.Parameter(torch.randn(out_features, r) * 0.01)
    
    def forward(self, x):
        # Compute LoRA update and add to frozen base output.
        # The LoRA update: W' = W + (B @ A)
        lora_update = F.linear(x, self.B @ self.A)
        orig_output = self.linear(x)
        return orig_output + lora_update

# New model architecture for Period 2 and beyond.
class BiGRUWithAttentionLoRA(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int, num_layers: int, dropout: float = 0.0, lora_r: int = 4):
        super(BiGRUWithAttentionLoRA, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # Base GRU and attention layers (will be copied and frozen)
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
        self.attention_fc = nn.Linear(hidden_size * 2, hidden_size * 2)
        self.dropout = nn.Dropout(dropout)
        # Final classification layer is replaced with our pure LoRA layer.
        self.fc = LoRALinear(hidden_size * 2, output_size, r=lora_r)

        # Removed this because for period 2 and beyond, the base layers are transferred from Period1 (and already well-initialized)
        # In other words, we are not reinitializing the base GRU/attention weights, instead, we are transferring and freezing them.

    #     self.init_weights()
    # def init_weights(self):
    #     for name, param in self.named_parameters():
    #         if 'weight' in name and 'linear' in name:
    #             nn.init.xavier_uniform_(param)
    #         elif 'bias' in name:
    #             nn.init.constant_(param, 0)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Initialize hidden state for bi-directional GRU: (num_layers * 2, batch, hidden_size)
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0) # Shape: [batch, seq_len, hidden_size*2]
        # Apply attention mechanism
        attn_weights = torch.tanh(self.attention_fc(out))
        out = attn_weights * out # Element-wise multiplication
        out = self.dropout(out)
        # Final prediction for each time step
        out = self.fc(out)  # Shape: [batch, seq_len, output_size]
        return out




## Step 6: Train the Models

### Training and validation function

In [78]:
def compute_classwise_accuracy(student_logits_flat, y_batch, class_correct, class_total):
    """
    Computes per-class accuracy by accumulating correct and total samples for each class.
    
    student_logits_flat: Model predictions (logits) in shape [batch_size * seq_len, output_size]
    y_batch: True labels in shape [batch_size * seq_len]
    class_correct: Dictionary to store correct predictions per class
    class_total: Dictionary to store total samples per class
    """
    # Convert logits to predicted class indices
    predictions = torch.argmax(student_logits_flat, dim=-1)  # Shape: [batch_size * seq_len]

    # Loop through batch elements to track correct/total per class
    for label, pred in zip(y_batch.cpu().numpy(), predictions.cpu().numpy()):
        if label not in class_total:
            class_total[label] = 0
            class_correct[label] = 0
        class_total[label] += 1
        if label == pred:
            class_correct[label] += 1

###################
# Period 1 Training
###################

# Training and validation function for Period 1.
def train_and_validate(model, output_size, criterion, optimizer, 
                       X_train, y_train, X_val, y_val, scheduler, 
                       use_scheduler=None, num_epochs=10, batch_size=64, 
                       model_saving_folder=None, model_name=None, stop_signal_file=None):
    print("'train_and_validate' function started. \n")
    # Ensure model saving folder exists (deleting existing first if there is one)
    if model_saving_folder and os.path.exists(model_saving_folder):
        # os.rmdir(model_saving_folder) # Only works on empty folders 
        shutil.rmtree(model_saving_folder) # Safely remove all contents
        if not os.path.exists(model_saving_folder):
            print(f"Existing folder has been removed : {model_saving_folder}\n")
    if model_saving_folder and not os.path.exists(model_saving_folder):
        os.makedirs(model_saving_folder)
        
    if not model_saving_folder:
        model_saving_folder = './saved_models'
    if not model_name:
        model_name = 'model'

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Convert data to tensors # Returns a copy, original is safe
    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)  # (seqs, seq_len, features)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)    # (seqs, seq_len)
    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    # Create TensorDatasets
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    print("y_train:")
    print(type(y_train))
    print(y_train.dtype)
    print(y_train.shape)
    print("X_train:")
    print(type(X_train))
    print(X_train.dtype)
    print(X_train.shape)
    print("\ny_val:")
    print(type(y_val))
    print(y_val.dtype)
    print(y_val.shape)
    print("X_val:")
    print(type(X_val))
    print(X_val.dtype)
    print(X_val.shape)

    # Debug prints for TensorDataset and DataLoader
    print("\nDataset Lengths:")
    print(f"Train Dataset Length: {len(train_dataset)}")
    print(f"Validation Dataset Length: {len(val_dataset)}")

    print("\nDataLoader Batch Sizes:")
    print(f"Number of Batches in Train DataLoader: {len(train_loader)}")
    print(f"Number of Batches in Validation DataLoader: {len(val_loader)}")

    # Additional details for y_train, y_val, and y_test
    print("\ny_train Unique Values and Stats:")
    print(f"Unique values in y_train: {y_train.unique()}")
    print(f"y_train Min: {y_train.min()}, Max: {y_train.max()}")

    print("\ny_val Unique Values and Stats:")
    print(f"Unique values in y_val: {y_val.unique()}")
    print(f"y_val Min: {y_val.min()}, Max: {y_val.max()}")

    # Device check
    print("\nDevice Info:")
    print(f"X_train Device: {X_train.device}")
    print(f"y_train Device: {y_train.device}")
    print(f"X_val Device: {X_val.device}")
    print(f"y_val Device: {y_val.device}\n")

    # Calculate number of batches
    # num_batches = (len(X_train) + batch_size - 1) // batch_size

    global best_results  # Ensure we can modify the external variable if defined outside.
    best_results = []    # Start empty each training run
    model.train()

    for epoch in range(num_epochs):
        epoch_loss = 0
        class_correct = {}  # Dictionary to store correct predictions per class
        class_total = {}  # Dictionary to store total samples per class
        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\nStop signal detected. Exiting training loop safely.\n")
            break
        model.train()
        i=0
        for X_batch, y_batch in train_loader:
            # Reset gradients before forward pass
            optimizer.zero_grad()  # Best practice

            # Forward pass
            outputs = model(X_batch)
            outputs = outputs.view(-1, output_size)
            y_batch = y_batch.view(-1)

            if epoch == 1 and i < 3:
                i += 1
                print(f"\nUnique target values: {y_batch.unique()}")
                print(f"Target dtype: {y_batch.dtype}")
                print(f"Min target: {y_batch.min()}, Max target: {y_batch.max()}")
                print("Unique classes in y_train:", y_train.unique())
                print(f"Unique classes in y_val: {y_val.unique()}\n")
            
            # Compute loss
            loss = criterion(outputs, y_batch)

            # Compute class-wise accuracy (Accumulates values in dict)
            compute_classwise_accuracy(outputs, y_batch, class_correct, class_total)

            # Backward pass and optimization
            # No longer reset gradients here: optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item() * X_batch.size(0)  # Scale back to total loss
            
        train_loss = epoch_loss / len(train_loader.dataset)

        # Compute per-class training accuracy
        # train_classwise_accuracy = {int(c): (class_correct[c] / class_total[c]) * 100 if class_total[c] > 0 else 0 
        #                            for c in sorted(class_total.keys())}
        train_classwise_accuracy = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%" 
                                    for c in sorted(class_total.keys())}
        
        # Perform validation at the end of each epoch
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        val_class_correct = {}
        val_class_total = {}
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                val_outputs = model(X_val_batch).view(-1, output_size)
                val_labels = y_val_batch.view(-1)
                val_loss += criterion(val_outputs, val_labels).item() * X_val_batch.size(0)  # Scale to total loss
                val_predictions = torch.argmax(val_outputs, dim=-1)
                val_correct += (val_predictions == val_labels).sum().item()
                val_total += val_labels.size(0)
                # Compute per-class validation accuracy
                compute_classwise_accuracy(val_outputs, val_labels, val_class_correct, val_class_total)
        val_loss /= len(val_loader.dataset)
        val_accuracy = val_correct / val_total

        # Compute per-class validation accuracy
        # val_classwise_accuracy = {int(c): (val_class_correct[c] / val_class_total[c]) * 100 if val_class_total[c] > 0 else 0 
        #                          for c in sorted(val_class_total.keys())}
        val_classwise_accuracy = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%" 
                                  for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {train_loss:.9f}, "
              f"Train-Class-Acc: {train_classwise_accuracy}, "
              f"Val Loss: {val_loss:.9f}, "
              f"Val Accuracy: {val_accuracy * 100:.2f}%, "
              f"Val-Class-Acc: {val_classwise_accuracy}, "
              f"LR: {optimizer.param_groups[0]['lr']:.9f}")

        # Save current model and update best results if applicable
        current_epoch_info = {
            "epoch": epoch+1,
            "train_loss": train_loss,
            "train_classwise_accuracy": train_classwise_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy,
            "val_classwise_accuracy": val_classwise_accuracy,
            'learning_rate': optimizer.param_groups[0]['lr'], # Optimizer state
            "model_path": os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        }

        # Insert this epoch if we have fewer than 5 results
        # or if it beats the lowest of the top 5
        if len(best_results) < 5 or val_accuracy > best_results[-1]["val_accuracy"]:
            if len(best_results) == 5:
                # Remove the worst model from the list, the last (lowest accuracy)
                worst = best_results.pop() 
                if os.path.exists(worst["model_path"]):
                    os.remove(worst["model_path"])
                    print(f"Removed old model with accuracy {worst['val_accuracy']*100:.2f}%, and file was at {worst['model_path']}")
            # Just insert and sort by val_accuracy descending
            best_results.append(current_epoch_info) 
            best_results.sort(key=lambda x: x["val_accuracy"], reverse=True)
            torch.save({ # Save this model
                'epoch': epoch+1,  # Save the current epoch
                'train_loss': train_loss,
                'val_loss': val_loss,
                'model_state_dict': model.state_dict(),  # Model weights
                'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
                'learning_rate': optimizer.param_groups[0]['lr'] # Optimizer state
            }, current_epoch_info["model_path"])
            print(f"Model saved after epoch {epoch+1} to {current_epoch_info['model_path']} \n")

        if use_scheduler == True:
            # Scheduler step should follow after considering the results (placed after otallher losses)
            scheduler.step(val_loss)


    # Save the final model
    if current_epoch_info:
        final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
        torch.save({ # Save this model
            'epoch': epoch+1,  # Save the current epoch
            'train_loss': train_loss,
            'val_loss': val_loss,
            'model_state_dict': model.state_dict(),  # Model weights
            'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
            'learning_rate': optimizer.param_groups[0]['lr'] # Optimizer state
        }, final_model_path)
        print(f"\nFinal model saved to {final_model_path}")

    print("\nTraining complete. \n\nTop 5 Models Sorted by Validation Accuracy: ")
    for res in best_results:        
        print(f"Epoch {res['epoch']}/{num_epochs}, "
              f"Train Loss: {res['train_loss']:.9f}, "
              f"Train-Class-Acc: {train_classwise_accuracy}, " 
              f"Val Loss: {res['val_loss']:.9f}, "
              f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
              f"Val-Class-Acc: {val_classwise_accuracy}, "
              f"Model Path: {res['model_path']}")
    print('\n')
    
    del X_train, y_train, X_val, y_val, train_dataset, val_dataset, train_loader, val_loader
    torch.cuda.empty_cache()

    # Load the checkpoint
    # checkpoint = torch.load("path/to/model_checkpoint.pth")
    # # Restore model state
    # model.load_state_dict(checkpoint['model_state_dict'])
    # # Restore optimizer state
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    # # Restore scheduler state (if used)
    # scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    # # Restore epoch and other metadata
    # start_epoch = checkpoint['epoch'] + 1  # Resume from the next epoch
    # loss = checkpoint['loss']  # Optional
    # print(f"Checkpoint loaded. Resuming from epoch {start_epoch}")

###########################################################
# Period 2 and beyond: Pure LoRA–Based Incremental Training
###########################################################

# Transfer the base classification layer weights from the Previous Period model to the Current Period model for stable classes.
def expand_and_transfer_fc(prev_period_model: Union[BiGRUWithAttention, BiGRUWithAttentionLoRA], 
                           current_period_model: BiGRUWithAttentionLoRA) -> None:
    """
    Automatically transfer stable class parameters from the previous period model to the current period model.
    Assumes that index 0 is shifting, and stable classes are indices 1 to (old_output_size-1).
    For example, if in Period 1 (old_output_size=2), class index 1 is stable and should map to Period 2 new index 1:
        stable_class_map = {1: 1}
    """
    # Get Period 1 base classifier weights and bias.
    old_weight = prev_period_model.fc.weight.data  # shape: (old_output_size, feature_dim)
    old_bias = prev_period_model.fc.bias.data        # shape: (old_output_size,)
    
    # Period 2 base linear layer (frozen) weights.
    new_weight = current_period_model.fc.linear.weight.data  # shape: (new_output_size, feature_dim)
    new_bias = current_period_model.fc.linear.bias.data       # shape: (new_output_size,)
    
    old_output_size = old_weight.shape[0]
    # For stable classes, copy corresponding rows.
    # Create a mapping: for indices 1 to old_output_size-1, transfer them to the same indices.
    for idx in range(1, old_output_size):
        new_weight[idx] = old_weight[idx]
        new_bias[idx] = old_bias[idx]
    current_period_model.fc.linear.weight.data = new_weight
    current_period_model.fc.linear.bias.data = new_bias

# Freeze all parameters in the Period 2 and beyond models except the LoRA parameters (A and B).
def freeze_base_parameters(model: BiGRUWithAttentionLoRA):
    for name, param in model.named_parameters():
        if "fc.A" in name or "fc.B" in name:
            param.requires_grad = True
        else:
            param.requires_grad = False

# Reinitialize the LoRA adapter for a given class index (e.g., Class 0, which is shifting).
def reinitialize_lora_for_class(model: BiGRUWithAttentionLoRA, class_index: int):
    with torch.no_grad():
        # Reinitialize using a normal distribution with mean 0 and std 0.01.
        model.fc.B[class_index].data.normal_(0, 0.01)

# Training and validation function for Period 2 and beyond 
def train_and_validate_lora(model, output_size, criterion, optimizer, 
                            X_train, y_train, X_val, y_val, scheduler, 
                            use_scheduler=None, num_epochs=10, batch_size=64,
                            model_saving_folder=None, model_name=None, stop_signal_file=None):
    """
    criterion: CrossEntropyLoss function.
    optimizer: Optimizer for student model.
    X_train, y_train, X_val, y_val: Training/validation data (as NumPy arrays or similar).
    num_epochs: Number of epochs to train.
    batch_size: Batch size for DataLoader.
    """
    print("'train_and_validate' function started. \n")
    # Ensure model saving folder exists (deleting existing first if there is one)
    if model_saving_folder and os.path.exists(model_saving_folder):
        # os.rmdir(model_saving_folder) # Only works on empty folders 
        shutil.rmtree(model_saving_folder) # Safely remove all contents
        if not os.path.exists(model_saving_folder):
            print(f"Existing folder has been removed : {model_saving_folder}\n")
    if model_saving_folder and not os.path.exists(model_saving_folder):
        os.makedirs(model_saving_folder)
        
    if not model_saving_folder:
        model_saving_folder = './saved_models'
    if not model_name:
        model_name = 'model'

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model.to(device)

    # Convert data to tensors # Returns a copy, original is safe
    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)  # (seqs, seq_len, features)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)    # (seqs, seq_len)
    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    # Create TensorDatasets
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    print("y_train:")
    print(type(y_train))
    print(y_train.dtype)
    print(y_train.shape)
    print("X_train:")
    print(type(X_train))
    print(X_train.dtype)
    print(X_train.shape)
    print("\ny_val:")
    print(type(y_val))
    print(y_val.dtype)
    print(y_val.shape)
    print("X_val:")
    print(type(X_val))
    print(X_val.dtype)
    print(X_val.shape)

    # Debug prints for TensorDataset and DataLoader
    print("\nDataset Lengths:")
    print(f"Train Dataset Length: {len(train_dataset)}")
    print(f"Validation Dataset Length: {len(val_dataset)}")

    print("\nDataLoader Batch Sizes:")
    print(f"Number of Batches in Train DataLoader: {len(train_loader)}")
    print(f"Number of Batches in Validation DataLoader: {len(val_loader)}")

    # Additional details for y_train, y_val, and y_test
    print("\ny_train Unique Values and Stats:")
    print(f"Unique values in y_train: {y_train.unique()}")
    print(f"y_train Min: {y_train.min()}, Max: {y_train.max()}")

    print("\ny_val Unique Values and Stats:")
    print(f"Unique values in y_val: {y_val.unique()}")
    print(f"y_val Min: {y_val.min()}, Max: {y_val.max()}")

    # Device check
    print("\nDevice Info:")
    print(f"X_train Device: {X_train.device}")
    print(f"y_train Device: {y_train.device}")
    print(f"X_val Device: {X_val.device}")
    print(f"y_val Device: {y_val.device}\n")

    # Calculate number of batches
    # num_batches = (len(X_train) + batch_size - 1) // batch_size

    global best_results  # Ensure we can modify the external variable if defined outside.
    best_results = []    # Start empty each training run

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        class_correct = {}  # Dictionary to store correct predictions per class
        class_total = {}  # Dictionary to store total samples per class
        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\nStop signal detected. Exiting training loop safely.\n")
            break
        model.train()
        i=0
        for X_batch, y_batch in train_loader:
            # Reset gradients before forward pass
            optimizer.zero_grad()  # Best practice
            # Forward pass
            outputs = model(X_batch)  # Shape: [batch, seq_len, output_size]
            outputs  = outputs .view(-1, output_size)
            y_batch = y_batch.view(-1)
            # Loss
            loss = criterion(outputs , y_batch)
            # Compute class-wise accuracy (Accumulates values in dict)
            compute_classwise_accuracy(outputs , y_batch, class_correct, class_total)
            if epoch == 1 and i < 3:
                i += 1
                print(f"\nUnique target values: {y_batch.unique()}")
                print(f"Target dtype: {y_batch.dtype}")
                print(f"Min target: {y_batch.min()}, Max target: {y_batch.max()}")
                print("Unique classes in y_train:", y_train.unique())
                print(f"Unique classes in y_val: {y_val.unique()}\n")
            # Backward pass and optimization
            # No longer reset gradients here: optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * X_batch.size(0)  # Scale back to total loss
        train_loss = epoch_loss / len(train_loader.dataset)

        # Compute per-class training accuracy
        # train_classwise_accuracy = {int(c): (class_correct[c] / class_total[c]) * 100 if class_total[c] > 0 else 0 
        #                            for c in sorted(class_total.keys())}
        train_classwise_accuracy = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%" 
                                    for c in sorted(class_total.keys())}

        # Perform validation at the end of each epoch (only CE loss and accuracy)
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        val_class_correct = {}
        val_class_total = {}
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                val_outputs = model(X_val_batch).view(-1, output_size)
                val_labels = y_val_batch.view(-1)
                val_loss += criterion(val_outputs, val_labels).item() * X_val_batch.size(0)  # Scale to total loss
                val_predictions = torch.argmax(val_outputs, dim=-1)
                val_correct += (val_predictions == val_labels).sum().item()
                val_total += val_labels.size(0)
                # Compute per-class validation accuracy
                compute_classwise_accuracy(val_outputs, val_labels, val_class_correct, val_class_total)
        val_loss /= len(val_loader.dataset)
        val_accuracy = val_correct / val_total

        # Compute per-class validation accuracy
        # val_classwise_accuracy = {int(c): (val_class_correct[c] / val_class_total[c]) * 100 if val_class_total[c] > 0 else 0 
        #                          for c in sorted(val_class_total.keys())}
        val_classwise_accuracy = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%" 
                                  for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {train_loss:.9f}, "
              f"Train-Class-Acc: {train_classwise_accuracy}, "
              f"Val Loss: {val_loss:.9f}, "
              f"Val Accuracy: {val_accuracy * 100:.2f}%, "
              f"Val-Class-Acc: {val_classwise_accuracy}, "
              f"LR: {optimizer.param_groups[0]['lr']:.9f}")

        # Save current model and update best results if applicable
        current_epoch_info = {
            "epoch": epoch+1,
            "train_loss": train_loss,
            "train_classwise_accuracy": train_classwise_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy,
            "val_classwise_accuracy": val_classwise_accuracy,
            'learning_rate': optimizer.param_groups[0]['lr'], # Optimizer state
            "model_path": os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        }

        # Insert this epoch if we have fewer than 5 results
        # or if it beats the lowest of the top 5
        if len(best_results) < 5 or val_accuracy > best_results[-1]["val_accuracy"]:
            if len(best_results) == 5:
                # Remove the worst model from the list, the last (lowest accuracy)
                worst = best_results.pop() 
                if os.path.exists(worst["model_path"]):
                    os.remove(worst["model_path"])
                    print(f"Removed old model with accuracy {worst['val_accuracy']*100:.2f}%, and file was at {worst['model_path']}")
            # Just insert and sort by val_accuracy descending
            best_results.append(current_epoch_info) 
            best_results.sort(key=lambda x: x["val_accuracy"], reverse=True)
            torch.save({ # Save this model
                'epoch': epoch+1,  # Save the current epoch
                'train_loss': train_loss,
                'val_loss': val_loss,
                'model_state_dict': model.state_dict(),  # Model weights
                'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
                'learning_rate': optimizer.param_groups[0]['lr'] # Optimizer state
            }, current_epoch_info["model_path"])
            print(f"Model saved after epoch {epoch+1} to {current_epoch_info['model_path']} \n")

        if use_scheduler == True:
            # Scheduler step should follow after considering the results (placed after otallher losses)
            scheduler.step(val_loss)

    # Save the final model
    if current_epoch_info:
        final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
        torch.save({ # Save this model
            'epoch': epoch+1,  # Save the current epoch
            'train_loss': train_loss,
            'val_loss': val_loss,
            'model_state_dict': model.state_dict(),  # Model weights
            'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
            'learning_rate': optimizer.param_groups[0]['lr'] # Optimizer state
        }, final_model_path)
        print(f"\nFinal model saved to {final_model_path}")

    print("\nTraining complete. \n\nTop 5 Models Sorted by Validation Accuracy: ")
    for res in best_results:        
        print(f"Epoch {res['epoch']}/{num_epochs}, "
              f"Train Loss: {res['train_loss']:.9f}, "
              f"Train-Class-Acc: {train_classwise_accuracy}, " 
              f"Val Loss: {res['val_loss']:.9f}, "
              f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
              f"Val-Class-Acc: {val_classwise_accuracy}, "
              f"Model Path: {res['model_path']}")
    print('\n')
    
    del X_train, y_train, X_val, y_val, train_dataset, val_dataset, train_loader, val_loader
    torch.cuda.empty_cache()


### Set parameters and initialize model

In [79]:
print("PyTorch compiled CUDA version:", torch.version.cuda)

PyTorch compiled CUDA version: 12.4


In [80]:
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("Num GPUs:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

torch.manual_seed(42)
print("Seeding successful!")


PyTorch Version: 2.5.1+cu124
CUDA Available: True
Num GPUs: 1
GPU Name: NVIDIA GeForce RTX 4060 Laptop GPU


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### list_period_files_full_path

In [32]:
pair = 'BTCUSD'
file_name = f'Polygon_{pair}_4Y_1min'  # File name for saving data
# BASE_FOLDER_PATH = f"{Working_directory}/Data/{file_name}"
BASE_FOLDER_PATH = f"Data/{file_name}"
# folder_path=f'{BASE_FOLDER_PATH}/polygon_io/12_USD_Crypto_Pairs/{file_name}'
folder_path=f'{BASE_FOLDER_PATH}'
if not os.path.isdir(folder_path):
    raise FileNotFoundError(f"Directory '{folder_path}' does not exist.")
file_path=f'{folder_path}/{file_name}.csv'
number_days = 190
with_indicators_file_path = os.path.normpath(f'{folder_path}/_{file_name}_{number_days}_days_with_indicators.csv')

# LABORATORY\_Global_Pytorch\Continual_Learning\Data\Polygon_BTCUSD_4Y_1min

list_period_files_full_path = [
    # Period 1
    with_indicators_file_path,
    # Period 2
    os.path.normpath(f"{folder_path}/Polygon_BTCUSD_4Y_1min_190_days__2020-11-11__2021-05-20__with_indicators.csv"),
    # Period 3
    os.path.normpath(f"{folder_path}/Polygon_BTCUSD_4Y_1min_190_days__2021-05-20__2021-11-26__with_indicators.csv"),
    # Period 4
    os.path.normpath(f"{folder_path}/Polygon_BTCUSD_4Y_1min_190_days__2021-11-26__2022-06-04__with_indicators.csv"),
    # Period 5
    os.path.normpath(f"{folder_path}/Polygon_BTCUSD_4Y_1min_190_days__2022-06-04__2022-12-11__with_indicators.csv")
]

with_indicators_file_path

'Data\\Polygon_BTCUSD_4Y_1min\\_Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv'

In [33]:
for file in os.listdir(folder_path):
    print(f"Found file: {file}")

# Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv

Found file: Polygon_BTCUSD_4Y_1min.png
Found file: Polygon_BTCUSD_4Y_1min_190_days__2020-11-11__2021-05-20.png
Found file: Polygon_BTCUSD_4Y_1min_190_days__2020-11-11__2021-05-20__with_indicators.csv
Found file: Polygon_BTCUSD_4Y_1min_190_days__2021-05-20__2021-11-26.png
Found file: Polygon_BTCUSD_4Y_1min_190_days__2021-05-20__2021-11-26__with_indicators.csv
Found file: Polygon_BTCUSD_4Y_1min_190_days__2021-11-26__2022-06-04.png
Found file: Polygon_BTCUSD_4Y_1min_190_days__2021-11-26__2022-06-04__with_indicators.csv
Found file: Polygon_BTCUSD_4Y_1min_190_days__2022-06-04__2022-12-11.png
Found file: Polygon_BTCUSD_4Y_1min_190_days__2022-06-04__2022-12-11__with_indicators.csv
Found file: Polygon_BTCUSD_4Y_1min_525_days.png
Found file: _Polygon_BTCUSD_4Y_1min_190_days.png
Found file: _Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv


In [34]:
X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
    with_indicators_file_path = list_period_files_full_path[0],
    downsampled_data_minutes = downsampled_data_minutes,
    exclude_columns = exclude_columns,
    lower_threshold = lower_threshold,
    upper_threshold = upper_threshold,
    reverse_steps = reverse_steps,
    sequence_length = sequence_length,
    sliding_interval = sliding_interval,
    trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
)

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, X_test, y_test, Number_features
del unique_classes, num_classes



File path: Data\Polygon_BTCUSD_4Y_1min\_Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv

data_retrieved - Missing timestamps time: 
DatetimeIndex([], dtype='datetime64[ns, UTC]', freq='min')

data_gaussian - Missing timestamps time: 
DatetimeIndex([], dtype='datetime64[ns, UTC]', freq='min')

columns_to_transform = 
Index(['open', 'high', 'low', 'close', 'SMA_5', 'SMA_10', 'EMA_10'], dtype='object'), 
len(columns_to_transform) = 7

data_log_return - Missing timestamps time: 
DatetimeIndex([], dtype='datetime64[ns, UTC]', freq='min')

No missing timestamps.

    Number of sequences:
        - sequences[0].shape: (1000, 7)
        - Total sequences: 4543
        - Train sequences: 3634
        - Validation sequences: 454
        - Test sequences: 455
    
close_col_index = 3
Number_features = 7
unique_classes = [0 1 2 3 4]
num_classes = 5


### __*All periods data*__

In [36]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""

# Initialize empty lists to combine validation elements
# X_train_all = []
# y_train_all = []
# X_val_all = []
# y_val_all = []
# X_test_all = []
# y_test_all = []

# for path_ in list_period_files_full_path:
#     with contextlib.redirect_stdout(open(os.devnull, 'w')):
#         X_train_, y_train_, X_val_, y_val_, X_test_, y_test_, Number_features = process_and_return_splits(
#             with_indicators_file_path = path_,
#             downsampled_data_minutes = downsampled_data_minutes,
#             exclude_columns = exclude_columns,
#             lower_threshold = lower_threshold,
#             upper_threshold = upper_threshold,
#             reverse_steps = reverse_steps,
#             sequence_length = sequence_length,
#             sliding_interval = sliding_interval,
#             trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
#         )
#     # Combine validation elements
#     X_train_all.extend(X_train_)
#     y_train_all.extend(y_train_)
#     X_val_all.extend(X_val_)
#     y_val_all.extend(y_val_)
#     X_test_all.extend(X_test_)
#     y_test_all.extend(y_test_)
#     # Delete unused variables to save memory
#     del X_train_, y_train_, X_val_, y_val_, X_test_, y_test_

# X_train_all = np.array(X_train_all)
# y_train_all = np.array(y_train_all)
# X_val_all = np.array(X_val_all)
# y_val_all = np.array(y_val_all)
# X_test_all = np.array(X_test_all)
# y_test_all = np.array(y_test_all)


# with contextlib.redirect_stdout(open(os.devnull, 'w')):
#     X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
#         with_indicators_file_path = list_period_files_full_path[0],
#         downsampled_data_minutes = downsampled_data_minutes,
#         exclude_columns = exclude_columns,
#         lower_threshold = lower_threshold,
#         upper_threshold = upper_threshold,
#         reverse_steps = reverse_steps,
#         sequence_length = sequence_length,
#         sliding_interval = sliding_interval,
#         trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
#         # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
#     )
    
# print(f"\nNumber_features = {Number_features}")

# unique_classes = np.unique(y_val)
# num_classes = len(unique_classes)
# print(f"unique_classes = {unique_classes}")
# print(f"num_classes = {num_classes}")


"\n- 'trend': Categorized trend values based on the detected phases:\n    - 0: No trend\n    - 1: Moderate negative trend\n    - 2: Very strong negative trend\n    - 3: Moderate positive trend\n    - 4: Very strong positive trend\n"

### Period 1 --> Training and saving in __*'1st_try'*__ (BiGRUWithAttention, num_layers = 4) ---> Val acc = 98.35 %
### Val-Class-Acc: {0: '98.10%', 1: '97.81%'}

In [58]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[0], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1]
num_classes = 2


In [61]:
#----------------------------------------------------------------------
# Initialize the list to store results across runs
track_across_runs = []
#----------------------------------------------------------------------

# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttention' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Baseline/Period_1/1st_try"))
ensure_folder(model_saving_folder)

# Instantiate the model
class_gru_model = BiGRUWithAttention(input_size, hidden_size, output_size, num_layers).to(device)

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(class_gru_model.parameters(), lr=0.0001) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train_all, y_train_all, X_val_all, y_val_all, scheduler, 
train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train, y_train, X_val, y_val, scheduler, 
                   False, num_epochs, batch_size, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model: \n{class_gru_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 57
Number of Batches in Validation DataLoader: 8

y_train Unique Values and Stats:
Unique values in y_train: tensor([0, 1], device='cuda:0')
y_train Min: 0, Max: 1

y_val Unique Values and Stats:
Unique values in y_val: tensor([0, 1], device='cuda:0')
y_val Min: 0, Max: 1

Device Info:
X_train Device: cuda:0
y_train Device: cuda:0
X_val Device: cuda:0
y_val Device: cuda:0

Epoch 1/2000, Train Loss: 0.681195149, 

### Period 1 --> Training and saving in __*'2nd_try'*__ (BiGRUWithAttention, num_layers = 3) ---> Val acc = 98.31 %

In [None]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[0], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")


In [None]:
# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 3  # Number of GRU layers
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttention' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Baseline/Period_1/2nd_try"))
ensure_folder(model_saving_folder)

# Instantiate the model
class_gru_model = BiGRUWithAttention(input_size, hidden_size, output_size, num_layers).to(device)

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(class_gru_model.parameters(), lr=0.0001) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train_all, y_train_all, X_val_all, y_val_all, scheduler, 
train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train, y_train, X_val, y_val, scheduler, 
                   False, num_epochs, batch_size, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model: \n{class_gru_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\2nd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 57
Number of Batches in Validation DataLoader: 8

y_train Unique Values and Stats:
Unique values in y_train: tensor([0, 1], device='cuda:0')
y_train Min: 0, Max: 1

y_val Unique Values and Stats:
Unique values in y_val: tensor([0, 1], device='cuda:0')
y_val Min: 0, Max: 1

Device Info:
X_train Device: cuda:0
y_train Device: cuda:0
X_val Device: cuda:0
y_val Device: cuda:0

Epoch 1/2000, Train Loss: 0.010724814, Val Loss: 0.011819

### Period 1 --> Training and saving in __*'3rd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4) ---> Val acc = 98.50 %

In [33]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[0], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1]
num_classes = 2


In [None]:
# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Baseline/Period_1/3rd_try"))
ensure_folder(model_saving_folder)

# Instantiate the model
class_gru_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(class_gru_model.parameters(), lr=0.0001) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train_all, y_train_all, X_val_all, y_val_all, scheduler, 
train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train, y_train, X_val, y_val, scheduler, 
                   False, num_epochs, batch_size, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model: \n{class_gru_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Period_1\3rd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 57
Number of Batches in Validation DataLoader: 8

y_train Unique Values and Stats:
Unique values in y_train: tensor([0, 1], device='cuda:0')
y_train Min: 0, Max: 1

y_val Unique Values and Stats:
Unique values in y_val: tensor([0, 1], device='cuda:0')
y_val Min: 0, Max: 1

Device Info:
X_train Device: cuda:0
y_train Device: cuda:0
X_val Device: cuda:0
y_val Device: cuda:0

Epoch 1/2000, Train Loss: 0.010701247, Val Loss:

### Period 1 --> Training and saving in __*'4th_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=8) ---> Val acc = 98.50 %

In [42]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[0], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1]
num_classes = 2


In [None]:
# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 8 # Rank of the low-rank update matrices
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Baseline/Period_1/4th_try"))
ensure_folder(model_saving_folder)

# Instantiate the model
class_gru_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(class_gru_model.parameters(), lr=0.0001) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train_all, y_train_all, X_val_all, y_val_all, scheduler, 
train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train, y_train, X_val, y_val, scheduler, 
                   False, num_epochs, batch_size, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model: \n{class_gru_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Period_1\4th_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 57
Number of Batches in Validation DataLoader: 8

y_train Unique Values and Stats:
Unique values in y_train: tensor([0, 1], device='cuda:0')
y_train Min: 0, Max: 1

y_val Unique Values and Stats:
Unique values in y_val: tensor([0, 1], device='cuda:0')
y_val Min: 0, Max: 1

Device Info:
X_train Device: cuda:0
y_train Device: cuda:0
X_val Device: cuda:0
y_val Device: cuda:0

Epoch 1/2000, Train Loss: 0.684206644, Val Loss:

### Period 2 --> Training and saving in __*'1st_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4) ---> Val acc =  %
### 

In [64]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[1], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2]
num_classes = 3


In [68]:
torch.cuda.empty_cache()

for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [65]:
# with contextlib.redirect_stdout(open(os.devnull, 'w')):
#     X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
#         with_indicators_file_path = list_period_files_full_path[1], # Change 
#         downsampled_data_minutes = downsampled_data_minutes,
#         exclude_columns = exclude_columns,
#         lower_threshold = lower_threshold,
#         upper_threshold = upper_threshold,
#         reverse_steps = reverse_steps,
#         sequence_length = sequence_length,
#         sliding_interval = sliding_interval,
#         trends_to_keep = {0, 1, 2}  # Default keeps all trends : {0, 1, 2, 3, 4}
#         # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
#     )
# print(f"\nNumber_features = {Number_features}")
# unique_classes = np.unique(y_val)
# num_classes = len(unique_classes)
# print(f"unique_classes = {unique_classes}")
# print(f"num_classes = {num_classes}")

# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
num_epochs= 500 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate the previous model
previous_model = BiGRUWithAttention(input_size, hidden_size, output_size - 1, num_layers)#.to(device)
best_model_path = r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth"
previous_model_checkpoint_path = os.path.normpath(best_model_path)
previous_model_checkpoint = torch.load(previous_model_checkpoint_path, map_location=device, weights_only=True)
previous_model.load_state_dict(previous_model_checkpoint['model_state_dict'])
del previous_model_checkpoint
gc.collect()
print(f"Loaded previous model from: \n\t{previous_model_checkpoint_path}")
print(f"\n{previous_model}\n")

# Convert data to tensors # Returns a copy, original is safe
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)  # (seqs, seq_len, features)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)    # (seqs, seq_len)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

previous_model.to(device)
previous_model.eval()
val_loss = 0.0
val_correct = 0
val_total = 0
val_class_correct = {}
val_class_total = {}
with torch.no_grad():
    for X_val_batch, y_val_batch in val_loader:
        val_outputs = previous_model(X_val_batch).view(-1, output_size-1)

        # # Get raw model predictions (originally only 2 logits)
        # val_outputs = previous_model(X_val_batch)  # Shape: [batch_size, seq_len, 2]
        # # Add a 3rd logit with value 0.0 for all samples
        # third_logit = torch.zeros((*val_outputs.shape[:-1], 1), device=val_outputs.device)  # Shape: [batch_size, seq_len, 1]
        # val_outputs = torch.cat((val_outputs, third_logit), dim=-1)  # Shape: [batch_size, seq_len, 3]
        # # Now apply view operation to match loss function's expected shape
        # val_outputs = val_outputs.view(-1, output_size)  # Correct shape: [batch_size * seq_len, 3]

        val_labels = y_val_batch.view(-1)
        # val_loss += criterion(val_outputs, val_labels).item() * X_val_batch.size(0)  # Scale to total loss
        # val_predictions = torch.argmax(val_outputs, dim=-1)
        # val_correct += (val_predictions == val_labels).sum().item()
        # val_total += val_labels.size(0)
        # Compute per-class validation accuracy
        compute_classwise_accuracy(val_outputs, val_labels, val_class_correct, val_class_total)
# val_loss /= len(val_loader.dataset)
# val_accuracy = val_correct / val_total
# print(f"val_accuracy = {val_accuracy* 100:.2f}%")

# Compute per-class validation accuracy
# val_classwise_accuracy = {int(c): (val_class_correct[c] / val_class_total[c]) * 100 if val_class_total[c] > 0 else 0 
#                          for c in sorted(val_class_total.keys())}
val_classwise_accuracy = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%" 
                            for c in sorted(val_class_total.keys())}

print(f"\nval_classwise_accuracy: {val_classwise_accuracy}")
print(f"val_class_correct: {val_class_correct}")
print(f"val_class_total: {val_class_total}")


del X_train, y_train, X_val, y_val, train_dataset, val_dataset, train_loader, val_loader, val_classwise_accuracy
torch.cuda.empty_cache()

for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Model parameters
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
num_epochs= 500 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_2/1st_try"))
ensure_folder(model_saving_folder)

# Instantiate the previous model
previous_model = BiGRUWithAttention(input_size, hidden_size, output_size - 1, num_layers)#.to(device)
#-------------------------------------------------------------------------
best_model_path = r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth"
#----------------------------------------------------------------------
# Initialize the list to store results across runs
track_across_runs = []
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
previous_model_checkpoint_path = os.path.normpath(best_model_path)
previous_model_checkpoint = torch.load(previous_model_checkpoint_path, map_location=device, weights_only=True)
previous_model.load_state_dict(previous_model_checkpoint['model_state_dict'])
del previous_model_checkpoint
gc.collect()
print(f"Loaded previous model from: \n\t{previous_model_checkpoint_path}")
print(f"\n{previous_model}\n")

# Instantiate the current model
current_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r)#.to(device)

# Transfer stable class parameters:
# Here, for example for period 2 training, we assume that in Period 1, Class 1 is stable. In Period 2, we want new index 1 to inherit from old index 1.
expand_and_transfer_fc(previous_model, current_model)

# Freeze all base parameters in the Current Period model so that only LoRA parameters update.
freeze_base_parameters(current_model)

# Reinitialize the LoRA adapter for Class 0 (which has a shifting definition)
reinitialize_lora_for_class(current_model, class_index=0)

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Only parameters with requires_grad=True (the LoRA adapters) will be updated.
# optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
optimizer = optim.Adam(filter(lambda p: p.requires_grad, current_model.parameters()), lr=learning_rate)
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(current_model, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\ncurrent_model: \n{current_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]
torch.cuda.empty_cache()


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth

BiGRUWithAttention(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\1st_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 

In [65]:
print(best_model_path)

Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth


### Period 2 --> Training and saving in __*'2nd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.4) ---> Val acc = 97.21 %
### Val-Class-Acc: {0: '99.68%', 1: '96.17%', 2: '85.94%'}

In [64]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[1], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2]
num_classes = 3


In [66]:
# Model parameters
stable_classes = [1] # <----------------------------<<< From period 1: I exclude class 0 because it will change in period 2
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.4          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_2/2nd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
# teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
teacher_model = BiGRUWithAttention(input_size, hidden_size, output_size - 1, num_layers).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\3rd_try\BiGRUWithAttentionLoRA_epoch_1943.pth")
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1623.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
# teacher_checkpoint = torch.load("teacher_model.pth", map_location=device)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth

BiGRUWithAttention(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\2nd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 

### Period 2 --> Training and saving in __*'3rd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.3) ---> Val acc = 97.16 %
### Val-Class-Acc: {0: '99.39%', 1: '97.03%', 2: '80.55%'}

In [67]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[1], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2]
num_classes = 3


In [68]:
# Model parameters
stable_classes = [1] # <----------------------------<<< From period 1: I exclude class 0 because it will change in period 2
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.3          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_2/3rd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
# teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
teacher_model = BiGRUWithAttention(input_size, hidden_size, output_size - 1, num_layers).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\3rd_try\BiGRUWithAttentionLoRA_epoch_1943.pth")
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1623.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
# teacher_checkpoint = torch.load("teacher_model.pth", map_location=device)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Baseline\Period_1\1st_try\BiGRUWithAttention_epoch_1987.pth

BiGRUWithAttention(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\3rd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number of Batches in Train DataLoader: 

### Period 3 --> Training and saving in __*'1st_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.5) ---> Val acc = 97.53 %
### Val-Class-Acc: {0: '88.15%', 1: '98.31%', 2: '93.32%', 3: '99.11%'}

In [69]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[2], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3]
num_classes = 4


In [70]:
# Model parameters
stable_classes = [1, 2] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 3
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.5          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_3/1st_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\1st_try\BiGRUWithAttentionLoRA_epoch_1395.pth")
#-------------------------------------------------------------------------
best_overall = max(track_across_runs, key=lambda res: res['val_accuracy'])
best_model_path = best_overall['model_path']
#----------------------------------------------------------------------
# Initialize the list to store results across runs
track_across_runs = []
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\2nd_try\BiGRUWithAttentionLoRA_epoch_421.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=3, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number

### Period 3 --> Training and saving in __*'2nd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.4) ---> Val acc = 97.50 %
### Val-Class-Acc: {0: '87.54%', 1: '98.24%', 2: '93.87%', 3: '99.06%'}

In [71]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[2], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3]
num_classes = 4


In [72]:
# Model parameters
stable_classes = [1, 2] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 3
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.4          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_3/2nd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\3rd_try\BiGRUWithAttentionLoRA_epoch_1144.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\2nd_try\BiGRUWithAttentionLoRA_epoch_421.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=3, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\2nd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number

### Period 3 --> Training and saving in __*'3rd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.3) ---> Val acc = 97.45 %
### Val-Class-Acc: {0: '87.96%', 1: '98.54%', 2: '95.55%', 3: '98.11%'}

In [73]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[2], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3]
num_classes = 4


In [74]:
# Model parameters
stable_classes = [1, 2] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 3
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.3          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_3/3rd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\3rd_try\BiGRUWithAttentionLoRA_epoch_1144.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

#----------------------------------------------------------------------
# Append only the best result (already at index 0)
track_across_runs.append(best_results[0])
#----------------------------------------------------------------------

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_2\2nd_try\BiGRUWithAttentionLoRA_epoch_421.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=3, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\3rd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Number

### Period 4 --> Training and saving in __*'1st_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.5) ---> Val acc = 96.06 %
### Val-Class-Acc: {0: '88.52%', 1: '96.82%', 2: '91.13%', 3: '97.89%', 4: '97.90%'}

In [75]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[3], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3 4]
num_classes = 5


In [76]:
# Model parameters
stable_classes = [1, 2, 3] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 4
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.5          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_4/1st_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1970.pth")
#-------------------------------------------------------------------------
best_overall = max(track_across_runs, key=lambda res: res['val_accuracy'])
best_model_path = best_overall['model_path']
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1964.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=4, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_4\1st_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Numbe

### Period 4 --> Training and saving in __*'2nd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.4) ---> Val acc = 96.32 %
### Val-Class-Acc: {0: '92.33%', 1: '96.85%', 2: '90.57%', 3: '97.74%', 4: '99.13%'}

In [77]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[3], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3 4]
num_classes = 5


In [78]:
# Model parameters
stable_classes = [1, 2, 3] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 4
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.4          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_4/2nd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1970.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1964.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=4, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_4\2nd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Numbe

### Period 4 --> Training and saving in __*'3rd_try'*__ (BiGRUWithAttentionLoRA, num_layers = 4, lora_r=4, alpha = 0.3) ---> Val acc = 96.40 %
### Val-Class-Acc: {0: '91.25%', 1: '96.34%', 2: '89.94%', 3: '98.33%', 4: '98.16%'}

In [79]:
"""
- 'trend': Categorized trend values based on the detected phases:
    - 0: No trend
    - 1: Moderate negative trend
    - 2: Very strong negative trend
    - 3: Moderate positive trend
    - 4: Very strong positive trend
"""
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[3], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends : {0, 1, 2, 3, 4}
        # trends_to_keep = {0, 1, 2, 3, 4}  # Default keeps all trends
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")



Number_features = 7
unique_classes = [0 1 2 3 4]
num_classes = 5


In [80]:
# Model parameters
stable_classes = [1, 2, 3] # <----------------------------<<< From period 2: I exclude class 0 because it will change in period 4
input_size = Number_features  # Number of features
hidden_size = 64  # Number of GRU units
output_size = num_classes # Must be dynamic, up to 5  # Number of trend classes (0, 15, 25, -15, -25)
num_layers = 4  # Number of GRU layers
dropout = 0.0
lora_r = 4 # Rank of the low-rank update matrices
learning_rate = 0.0001
alpha = 0.3          # Weight for distillation loss
num_epochs= 2000 # Number of epochs/ go through entire data
batch_size= 64 # How many sequences passed at once to the model
model_name = 'BiGRUWithAttentionLoRA' # Name of the model to use for saving
best_results = [] # Initialize this outside the training function or at the beginning of training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define a global stop signal
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
model_saving_folder = os.path.normpath(os.path.join('Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/LoRA/Period_4/3rd_try"))
ensure_folder(model_saving_folder)

# Instantiate the student model
student_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size, num_layers, dropout, lora_r).to(device)

# Instantiate and load the teacher model
teacher_model = BiGRUWithAttentionLoRA(input_size, hidden_size, output_size - 1, num_layers, dropout, lora_r).to(device)
# teacher_checkpoint_path = os.path.normpath(r"Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1970.pth")
#-------------------------------------------------------------------------
#----------------------------------------------------------------------
#----------------------------------------------------------------------
#-------------------------------------------------------------------------
teacher_checkpoint_path = os.path.normpath(best_model_path)
teacher_checkpoint = torch.load(teacher_checkpoint_path, map_location=device, weights_only=True)
# print(f"\n{teacher_checkpoint}\n")
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
del teacher_checkpoint
gc.collect()
print(f"Loaded teacher model from: \n\t{teacher_checkpoint_path}")
print(f"\n{teacher_model}\n")

# Define the loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()
# Optimizer: only train parameters of the student model (LoRA parameters and any unfrozen ones).
optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) # lr=0.00005
# optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_and_validate_lora(student_model, teacher_model, stable_classes, output_size, criterion, optimizer, 
                        X_train, y_train, X_val, y_val, scheduler, False, num_epochs, batch_size, 
                        alpha, model_saving_folder, model_name, stop_signal_file)

for res in best_results:        
    print(f"Epoch {res['epoch']}/{num_epochs}, "
            f"Train Loss: {res['train_loss']:.4f}, " 
            f"Val Loss: {res['val_loss']:.4f}, "
            f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
            f"Model Path: {res['model_path']}")      
print(f"\nclass_gru_model (student_model): \n{student_model}")

print(f"\nunique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")
# del unique_classes, num_classes
for var in ["X_train", "y_train", "X_val", "y_val", "X_test", "y_test", "Number_features", "unique_classes", "num_classes"]:
    if var in locals():
        del locals()[var]


Loaded teacher model from: 
	Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_3\1st_try\BiGRUWithAttentionLoRA_epoch_1964.pth

BiGRUWithAttentionLoRA(
  (gru): GRU(7, 64, num_layers=4, batch_first=True, bidirectional=True)
  (attention_fc): Linear(in_features=128, out_features=128, bias=True)
  (fc): LoRALinear(
    (linear): Linear(in_features=128, out_features=4, bias=True)
  )
  (dropout): Dropout(p=0.0, inplace=False)
)

'train_and_validate' function started. 

Existing folder has been removed : Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\LoRA\Period_4\3rd_try

y_train:
<class 'torch.Tensor'>
torch.int64
torch.Size([3634, 1000])
X_train:
<class 'torch.Tensor'>
torch.float32
torch.Size([3634, 1000, 7])

y_val:
<class 'torch.Tensor'>
torch.int64
torch.Size([454, 1000])
X_val:
<class 'torch.Tensor'>
torch.float32
torch.Size([454, 1000, 7])

Dataset Lengths:
Train Dataset Length: 3634
Validation Dataset Length: 454

DataLoader Batch Sizes:
Numbe

## Step 7: __*Getting into Transfer Learning for 5 periods*__

In [55]:
def custom_evaluattion_function(model, list_period_files_full_path, criterion, output_size, batch_size=64, model_number=100):
    # 1- With the given model, for each period in the list, predict and print accuracy
    # 2- With the given model, predict and print accuracy for all data combined.
    # For (2), you can do it by saving in a dictionary the accuracy and sample number as you go through each period

    print(f"\nUsing model {model_number}: \n{model}\n")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    # Dictionary to save predictions and details.
    store_preds = {}

    for i, path_ in enumerate(list_period_files_full_path):
        # Suppress output by redirecting to os.devnull
        with contextlib.redirect_stdout(open(os.devnull, 'w')):
            X_train_, y_train_, X_val_, y_val_, X_test_, y_test_ = process_and_return_splits(
                with_indicators_file_path = path_,
                downsampled_data_minutes = downsampled_data_minutes,
                exclude_columns = exclude_columns,
                lower_threshold = lower_threshold,
                upper_threshold = upper_threshold,
                reverse_steps = reverse_steps,
                sequence_length = sequence_length,
                sliding_interval = sliding_interval
            )

        val_loader_ = DataLoader(TensorDataset(torch.tensor(X_val_, dtype=torch.float32).to(device),  # (seqs, seq_len, features),
                                                       torch.tensor(y_val_, dtype=torch.long).to(device)    # (seqs, seq_len)
                                                       ), 
                                                       batch_size=batch_size)
        del X_train_, y_train_, X_val_, y_val_, X_test_, y_test_
        gc.collect()
        torch.cuda.empty_cache()

        # Perform validation at the end of each epoch
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader_:
                val_outputs = model(X_val_batch).view(-1, output_size)
                val_labels = y_val_batch.view(-1)
                val_loss += criterion(val_outputs, val_labels).item()
                val_predictions = torch.argmax(val_outputs, dim=-1)
                val_correct += (val_predictions == val_labels).sum().item()
                val_total += val_labels.size(0)
        val_loss /= len(val_loader_.dataset)
        val_accuracy = val_correct / val_total

        store_preds[i+1] = {'val_loss' : val_loss, 
                            'val_accuracy' : val_accuracy,
                            'val_correct' : val_correct,
                            'val_total' : val_total}
        
        print(f"Period {i+1}/{len(list_period_files_full_path)}, "
              f"Val Loss: {val_loss:.9f}, "
              f"Val Accuracy: {val_accuracy * 100:.2f}%, ")
        
        # Clean up DataLoader and clear cache
        del val_loader_
        gc.collect()
        torch.cuda.empty_cache()

    # Iterate through the stored predictions
    print()
    for period_key in sorted(store_preds.keys()):
        print("#---------------------------------------------------------#")
        # Get current period's accuracy and total
        val_correct = store_preds[period_key]['val_correct']
        val_total = store_preds[period_key]['val_total']
        current_accuracy = store_preds[period_key]['val_accuracy']

        # Print accuracy for the current period
        print(f"Period {period_key}: Accuracy: {current_accuracy * 100:.2f}%")

        # If not the first period, calculate and print combined accuracy
        if period_key > 1:
            combined_correct = sum(store_preds[key]['val_correct'] for key in range(1, period_key + 1))
            combined_total = sum(store_preds[key]['val_total'] for key in range(1, period_key + 1))
            combined_accuracy = combined_correct / combined_total
            print(f"Combined Accuracy up to Period {period_key}: {combined_accuracy * 100:.2f}%")
    print("#---------------------------------------------------------#")
    print()
    return

def periods_evaluation_transfer_learning(model_number, best_epoch_number_dic, list_period_files_full_path, lr=0.00001):
    """
    There are many variables explicitely declared in this function, pay attention!
    """
    
    torch.manual_seed(42)
    print("Seeding successful!\n")

    # Model parameters
    input_size = Number_features  # Number of features
    hidden_size = 64  # Number of GRU units
    output_size = 5  # Number of trend classes (0, 15, 25, -15, -25)
    num_layers = 4  # Number of GRU layers
    num_epochs= 2000 # Number of epochs/ go through entire data
    batch_size= 64 # How many sequences passed at once to the model
    model_name = 'BiGRUWithAttention' # Name of the model to use for saving
    global best_results
    best_results = [] # Initialize this outside the training function or at the beginning of training
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Define a global stop signal
    stop_signal_file = os.path.normpath(os.path.join(Working_directory, 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))  # Create this file to stop training
    model_saving_folder_init = os.path.normpath(os.path.join(Working_directory, "Classif_Bi_Dir_GRU_Model/Trained_models/2nd_try"))
    ensure_folder(model_saving_folder_init)

    # Instantiate the model
    class_gru_model = BiGRUWithAttention(input_size, hidden_size, output_size, num_layers).to(device)

    # Define the loss function, optimizer and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(class_gru_model.parameters(), lr=lr) # lr=0.00005
    # optimizer = optim.Adam(class_gru_model.parameters(), lr=0.001, weight_decay=1e-5)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

    #---------------------------------------------------------
    
    if model_number == 1:
        # Load the best saved base model parameters
        epoch_number = best_epoch_number_dic[model_number]
        base_model_path = os.path.normpath(
            os.path.join(model_saving_folder_init, f"{model_name}_epoch_{epoch_number}.pth"))

        # Copy the file Destination directory for normalization
        destination_directory = os.path.normpath(
            os.path.join(model_saving_folder_init, f"Small_Final/period_{model_number}"))
        ensure_folder(destination_directory)
        destination_path = os.path.join(destination_directory, os.path.basename(base_model_path))
        shutil.copy(base_model_path, destination_path)

        #---------------------------------------------------------
        # print(f"\n{class_gru_model}\n")
        checkpoint = torch.load(destination_path, map_location=device, weights_only=True)
        # print(f"\n{checkpoint}\n")
        class_gru_model.load_state_dict(checkpoint['model_state_dict'])
        del checkpoint
        gc.collect()
        print(f"Loaded 'base model / model {model_number}' from: \n\t{destination_path}")
        # print(f"\n{class_gru_model}\n")

    elif model_number > 1:
        # Load the best saved base model parameters
        epoch_number = best_epoch_number_dic[model_number-1]
        previous_model_folder = os.path.normpath(os.path.join(model_saving_folder_init, f'Small_Final/period_{model_number-1}'))
        previous_model_path = os.path.normpath(os.path.join(previous_model_folder, f"{model_name}_epoch_{epoch_number}.pth"))
        # print(f"\n{class_gru_model}\n")
        checkpoint = torch.load(previous_model_path, map_location=device, weights_only=True)
        # print(f"\n{checkpoint}\n")
        class_gru_model.load_state_dict(checkpoint['model_state_dict'])
        del checkpoint
        gc.collect()
        print(f"Loaded base model from: \n\t{previous_model_path}")
        # print(f"\n{class_gru_model}\n")

        #---------------------------------------------------------
        # Creating New Saving Folder
        model_saving_folder = os.path.normpath(os.path.join(model_saving_folder_init, f'Small_Final/period_{model_number}'))
        ensure_folder(model_saving_folder)

        #---------------------------------------------------------
        # New dataset to work with
        X_train_, y_train_, X_val_, y_val_, X_test_, y_test_ = process_and_return_splits(
            with_indicators_file_path = list_period_files_full_path[model_number-1], # Period data
            # with_indicators_file_path = list_period_files_full_path[0], # Period data
            downsampled_data_minutes = downsampled_data_minutes,
            exclude_columns = exclude_columns,
            lower_threshold = lower_threshold,
            upper_threshold = upper_threshold,
            reverse_steps = reverse_steps,
            sequence_length = sequence_length,
            sliding_interval = sliding_interval
        )
        del X_test_, y_test_
        #---------------------------------------------------------

        train_and_validate(class_gru_model, output_size, criterion, optimizer, X_train_, y_train_, X_val_, y_val_, scheduler, 
                        False, num_epochs, batch_size, model_saving_folder, model_name, stop_signal_file)

        best_epoch_number_dic[model_number] = best_results[0]['epoch']
        del X_train_, y_train_, X_val_, y_val_
        gc.collect()
        torch.cuda.empty_cache()
        #---------------------------------------------------------

        for res in best_results:        
            print(f"Epoch {res['epoch']}/{num_epochs}, "
                    f"Train Loss: {res['train_loss']:.4f}, " 
                    f"Val Loss: {res['val_loss']:.4f}, "
                    f"Val Accuracy: {res['val_accuracy'] * 100:.2f}%, "
                    f"Model Path: {res['model_path']}")      
        print(f"\nclass_gru_model: \n{class_gru_model}")
        del class_gru_model
        gc.collect()
        torch.cuda.empty_cache()
        #---------------------------------------------------------

        # Instantiate the model again
        class_gru_model = BiGRUWithAttention(input_size, hidden_size, output_size, num_layers).to(device)

        #---------------------------------------------------------
        # Load the best saved base model parameters
        epoch_number = best_epoch_number_dic[model_number]
        curr_best_model_path = os.path.normpath(os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch_number}.pth")) # File of the best epoch
        # print(f"\n{class_gru_model}\n")
        checkpoint = torch.load(curr_best_model_path, map_location=device, weights_only=True)
        # print(f"\n{checkpoint}\n")
        class_gru_model.load_state_dict(checkpoint['model_state_dict'])
        del checkpoint
        gc.collect()
        print(f"Loaded model {model_number} from: \n\t{curr_best_model_path}")
        # print(f"\n{class_gru_model}\n")

    else:
        print(f"Give an appropriate model_number (1, 2, ..., 5, ...). Passed model_number = {model_number}\n")
        return -1
    
    #---------------------------------------------------------
    custom_evaluattion_function(class_gru_model, list_period_files_full_path, criterion, output_size, batch_size, model_number)
    del class_gru_model
    gc.collect()
    torch.cuda.empty_cache()

    #---------------------------------------------------------
    return best_epoch_number_dic


## Step 10: Evaluate the Model

### Testing function

In [None]:
def test_model(model_class, model_path, X_test, y_test, criterion, input_size, hidden_size, output_size, num_layers):
    """
    Function to test a saved model on test data.
    
    Parameters:
        model_class (nn.Module): The class of the model to instantiate.
        model_path (str): Path to the saved model file.
        X_test (np.ndarray or torch.Tensor): Test features of shape (num_samples, seq_len, num_features).
        y_test (np.ndarray or torch.Tensor): Test labels of shape (num_samples, seq_len).
        output_size (int): Number of output classes.
        criterion: Loss function.
        
    Returns:
        np.ndarray: Predicted classes for the test data.
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    print(f"Loading model from {model_path}...")
    
    # Load the model
    model = model_class(input_size=X_test.shape[-1], hidden_size=hidden_size, output_size=output_size, num_layers=num_layers)
    
    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
    print("Checkpoint Keys:", checkpoint.keys() if isinstance(checkpoint, dict) else "State dict directly stored", '\n')
    
    if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
        print("Dictionaries stored \n")
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        print("State dict directly stored \n")
        model.load_state_dict(checkpoint)  # Assume it's directly the state dict

    model.to(device)
    model.eval()  # Set model to evaluation mode

    # Convert test data to tensors
    X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test = torch.tensor(y_test, dtype=torch.long).to(device)
    
    with torch.no_grad():
        # Forward pass
        outputs = model(X_test)  # Shape: (batch_size, seq_len, output_size)
        outputs = outputs.view(-1, output_size)  # Flatten for prediction and loss calculation
        y_test_flat = y_test.view(-1)  # Flatten labels

        # Calculate loss
        test_loss = criterion(outputs, y_test_flat).item()
        
        # Predictions
        predictions = torch.argmax(outputs, dim=-1).cpu().numpy()  # Convert to NumPy array

        # Calculate accuracy
        test_accuracy = (predictions == y_test_flat.cpu().numpy()).mean() * 100

    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.2f}% \n")

    return predictions.reshape(y_test.shape)  # Reshape to match the original test data
