# *__Working on BTCUSD predictions with GRU model(DynEx_CLoRA)__*

## __Check first before starting__

In [1]:
import os

# Change the working directory to the project root
Working_directory = os.path.normpath("C:/Users/james/OneDrive/文件/Continual_Learning")
# Working_directory = os.path.normpath("/mnt/mydisk/Continual_Learning")
os.chdir(Working_directory)
print(f"Working directory: {os.getcwd()}")

Working directory: C:\Users\james\OneDrive\文件\Continual_Learning


## __All imports__

In [2]:
# Operating system and file management
import os
import shutil
import contextlib
import traceback
import gc
import glob, copy

# Jupyter notebook widgets and display
import ipywidgets as widgets
from IPython.display import display

# Data manipulation and analysis
import pandas as pd
import numpy as np

# Plotting and visualization
import matplotlib.pyplot as plt
from mpl_interactions import zoom_factory, panhandler

# Machine learning and preprocessing
from sklearn.model_selection import train_test_split
import pickle
from ta import trend, momentum, volatility, volume

# Mathematical and scientific computing
import math
from scipy.ndimage import gaussian_filter1d

# Type hinting
from typing import Callable, Tuple

# Deep learning with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

## __All functions (For data processing)__

In [3]:
def ensure_folder(folder_path: str) -> None:
    """Ensure the given folder exists, create it if not."""
    os.makedirs(folder_path, exist_ok=True)

def plot_with_matplotlib(data: pd.DataFrame, 
                         title: str, 
                         interactive: bool = False, 
                         save_path: str = None, 
                         show_plot: bool = True, 
                         save_matplotlib_object: str = None) -> None:
    """
    Plot time-series data using Matplotlib with optional trend-based coloring.

    Args:
        - data (pd.DataFrame): Data containing a 'close' column (required).
        - title (str): Plot title.
        - interactive (bool): Enable zoom & pan if True.
        - save_path (str, optional): Path to save the figure.
        - show_plot (bool): Whether to display the plot.
        - save_matplotlib_object (str, optional): Path to save the Matplotlib object.

    Returns:
        - None: Displays or saves the plot as specified.
    """
    # Check if 'close' column exists
    if 'close' not in data.columns:
        raise ValueError("DataFrame must contain a 'close' column.")

    # Set default color from Matplotlib cycle
    default_blue = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
    
    # Define colors for different trends
    trend_colors = {
        0: 'black',
        1: 'yellow',
        2: 'red',
        3: 'green',
        4: default_blue
    }

    # Create figure and axis for plotting
    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot with trend-based coloring if 'trend' column exists
    if 'trend' in data.columns:
        legend_added = set()
        prev_idx = data.index[0]
        for idx, row in data.iterrows():
            if idx != prev_idx:
                trend_key = int(row['trend'])
                label = f'Trend {trend_key}' if trend_key not in legend_added else None
                ax.plot([prev_idx, idx], 
                        [data.loc[prev_idx, 'close'], row['close']],
                        color=trend_colors[trend_key], 
                        linestyle='-', 
                        linewidth=1,
                        label=label)
                legend_added.add(trend_key)
            prev_idx = idx
        ax.set_title(f"{title} (Connected, Colored by Trend)")
    else:
        # Plot default line if no 'trend' column
        ax.plot(data.index, data['close'], label='Closing Price', linestyle='-', marker='o', 
                markersize=2, linewidth=1, color=default_blue, markerfacecolor='green', markeredgecolor='black')
        ax.set_title(title)
    
    # Set axis labels and add legend/grid
    ax.set_xlabel('Date')
    ax.set_ylabel('Closing Price (USD)')
    ax.legend()
    ax.grid()
    
    # Enable interactive features if requested
    if interactive:
        zoom_factory(ax)
        panhandler(fig)

    # Save the plot if a path is provided
    if save_path:
        fig.tight_layout()
        fig.savefig(save_path, dpi=300, bbox_inches='tight')

    # Save the Matplotlib object if requested
    if save_matplotlib_object:
        with open(save_matplotlib_object, 'wb') as f:
            pickle.dump(fig, f)

    # Display the plot if requested
    if show_plot:
        plt.show()

def load_and_show_pickle(pickle_file_path: str):
    """
    Load a pickled Matplotlib figure object and display it.

    Args:
        - pickle_file_path (str): Path to the pickled Matplotlib figure file.

    Returns:
        - None: Displays the loaded figure.
    """
    # Load and display the pickled figure
    try:
        with open(pickle_file_path, "rb") as f:
            loaded_fig = pickle.load(f)

        print(f"Figure successfully loaded and displayed from: {pickle_file_path}")
        plt.show(block=True)

    except FileNotFoundError:
        print(f"Error: File not found at {pickle_file_path}.")
    except Exception as e:
        print(f"Error loading the pickled figure: {e}")

def save_to_csv(df: pd.DataFrame, file_path: str) -> None:
    """
    Save DataFrame to CSV.
    """
    df.to_csv(file_path)
    print(f"\nSuccessfully saved data with moving average to CSV: \n\t{file_path}\n")

def read_csv_file(file_path: str, preview_rows: int = 5, 
                  days_towards_end: int = None, 
                  days_from_start: int = None, description: str = ""):
    """
    Reads a CSV file and returns a pandas DataFrame filtered by date range.

    Args:
        - file_path (str): The path to the CSV file.
        - preview_rows (int): Number of rows to preview (default is 5).
        - days_towards_end (int, optional): Number of days from the most recent date.
        - days_from_start (int, optional): Number of days from the oldest date of filtered data.
        - description (str): A brief description of the dataset.
                           Explanation:
                           - To retrieve data from the **end**: Use `days_towards_end`.
                           - To retrieve data from the **start of the filtered range**: Use `days_from_start`.
                           - To retrieve data from the **middle**: Use both:
                             For example, if `days_towards_end=100` and `days_from_start=50`,
                             the function will first filter the last 100 days of the dataset,
                             and then filter the first 50 days from this range.
                             This results in data between the last 100th and the last 50th day.

    Returns:
        - pd.DataFrame: The loaded and filtered data from the CSV file.
    """
    try:
        if description:
            print(f"\nDescription: {description}")
        print(f"\nFile path: {file_path}")
        
        # Read the CSV file
        data = pd.read_csv(file_path, parse_dates=['date'], index_col='date')
        
        # Filter by days towards the end
        if days_towards_end is not None:
            # Get the most recent date in the dataset
            last_date = data.index.max()
            end_cutoff_date = last_date - pd.Timedelta(days=days_towards_end)
            data = data[data.index >= end_cutoff_date]
            print(f"\nRetrieving data from the past {days_towards_end} days (from {end_cutoff_date.date()} onwards):")
        
        # Filter by days from the start (from the filtered data)
        if days_from_start is not None:
            # Get the earliest date in the filtered dataset
            first_date = data.index.min()
            start_cutoff_date = first_date + pd.Timedelta(days=days_from_start)
            data = data[data.index <= start_cutoff_date]
            print(f"\nRetrieving the first {days_from_start} days from the filtered data (up to {start_cutoff_date.date()}):")

        if preview_rows:
            # Print a preview of the data
            print(f"\nPreview of the first {preview_rows} rows:")
            display(data.head(preview_rows))
            print()

            print(f"\nPreview of the last {preview_rows} rows:")
            display(data.tail(preview_rows))
            print()

        return data
    
    except FileNotFoundError:
        print("Error: File not found.")
    except pd.errors.EmptyDataError:
        print("Error: The file is empty.")
    except pd.errors.ParserError:
        print("Error: File parsing failed.")
    except Exception as e:
        print(f"Unexpected error: {e}")

def downsample_minute_data(data: pd.DataFrame, n: int) -> pd.DataFrame:
    """
    Downsample minute data into N-minute intervals by retaining every Nth row.

    Args:
        - data (pd.DataFrame): The original DataFrame with a datetime index.
        - n (int): The number of minutes for the downsampling interval.

    Returns:
        - pd.DataFrame: Downsampled DataFrame.
    """
    print("\n========---> Downsampling the data! \n")
    data = data.copy()

    # Ensure index is a DatetimeIndex
    if not isinstance(data.index, pd.DatetimeIndex):
        try:
            data.index = pd.to_datetime(data.index)
        except Exception as e:
            raise ValueError("DataFrame index conversion to DatetimeIndex failed.") from e

    # Downsample by selecting rows where minute % N == 0
    return data[data.index.minute % n == 0]

def calculate_log_returns_all_columns(data: pd.DataFrame, exclude_columns: list = [], dropna: bool = True) -> pd.DataFrame:
    """
    Calculate log returns for all numeric columns in a pandas DataFrame,
    excluding specified columns, and removing excluded columns from the returned DataFrame.

    Args:
        - data (pd.DataFrame): Input DataFrame containing numeric data.
        - exclude_columns (list): List of columns to exclude from log return calculations and the result.
        - dropna (bool): Whether to drop rows with NaN values resulting from the calculation.

    Returns:
        - pd.DataFrame: DataFrame with log returns for numeric columns, excluding specified columns.
    """
    # Copy data and remove excluded columns
    data = data.copy().drop(columns=exclude_columns)
    
    # Select numeric columns for transformation
    columns_to_transform = data.select_dtypes(include=[np.number]).columns
    print(f"columns_to_transform = \n{columns_to_transform}, \nlen(columns_to_transform) = {len(columns_to_transform)}")

    # Calculate log returns for each numeric column
    for col in columns_to_transform:
        if (data[col] <= 0).any():
            raise ValueError(f"Column '{col}' contains non-positive values. Log returns require strictly positive values.")
        data[col] = np.log(data[col] / data[col].shift(1))

    # Return data with or without NaN rows based on dropna
    return data.dropna() if dropna else data

def created_sequences_2(data: pd.DataFrame, sequence_length: int = 60, sliding_interval: int = 60) -> list:
    """
    Divide the dataset into sequences based on the sequence_length.
    Each sequence must fully cover the window size.

    Args:
    - data (pd.DataFrame): The input DataFrame.
    - sequence_length (int): The window size for sequences.

    Returns:
    - sequences (list): A list of sequences (as DataFrames).
    """
    sequences = []
    
    # Iterate over the data with a sliding window to create sequences
    for i in range(0, len(data) - sequence_length + 1, sliding_interval):
        # Extract a sequence of specified length from the DataFrame
        seq = data.iloc[i:i + sequence_length].copy()
        sequences.append(seq)

    return sequences

def gaussian_smoothing(data: pd.DataFrame, sigma=2) -> pd.DataFrame:
    """
    Applies Gaussian smoothing to numeric columns in a DataFrame.

    Args:
        - data (pd.DataFrame): Input DataFrame.
        - sigma (float): Standard deviation for the Gaussian kernel (default is 2).

    Returns:
        - pd.DataFrame: Smoothed DataFrame with sorted index.
    """
    # Sort data by index in ascending order and create a copy
    data = data.sort_index(ascending=True).copy()
    
    # Apply Gaussian smoothing to numeric columns
    for column in data.columns:
        if pd.api.types.is_numeric_dtype(data[column]):
            data[column] = gaussian_filter1d(data[column].values, sigma=sigma)
    
    return data

def detect_trends_4(
    dataframe: pd.DataFrame, 
    column: str = 'close', 
    lower_threshold: float = 0.001, 
    upper_threshold: float = 0.02,
    reverse_steps: int = 7,
    trends_to_keep: set = {0, 1, 2, 3, 4}  # Default keeps all trends
) -> pd.DataFrame:
    """
    Detects trends based on log return data provided in a specified column and categorizes them into different strength levels.

    This function analyzes time-series data by evaluating cumulative trends in log return values provided in the input DataFrame. 
    It uses three dictionaries (`dic1`, `dic2`, `dic3`) to track different phases of trends, handles multi-step reversals, and 
    classifies trends dynamically based on cumulative product thresholds and specified thresholds for trend strengths.

    Args:
        - dataframe (pd.DataFrame): Input DataFrame with log return data.
        - column (str): Column name for log returns (default is 'close').
        - lower_threshold (float): Threshold for moderate trends (default is 0.001).
        - upper_threshold (float): Threshold for strong trends (default is 0.02).
        - reverse_steps (int): Steps to confirm trend reversal (default is 7).
        - trends_to_keep (set): Trends to retain, others set to 0 (default is {0, 1, 2, 3, 4}).

    Returns:
        pd.DataFrame: DataFrame with 'trend' column:
                        - 0: No trend
                        - 1: Moderate negative trend
                        - 2: Very strong negative trend
                        - 3: Moderate positive trend
                        - 4: Very strong positive trend
                      Any trends not included in `trends_to_keep` will be reset to 0.

    Function Details:
    1. **Input Assumption**:
    - The input DataFrame already contains log return data in the specified column (`column`).

    2. **Trend Tracking**:
    - Uses dictionaries to monitor trends:
        - `dic1`: Tracks the first phase of the trend.
        - `dic2`: Tracks the second phase if a reversal occurs.
        - `dic3`: Tracks the third phase if another reversal occurs.

    3. **Cumulative Product**:
    - Calculates the cumulative product of `(1 + log_return)` from the specified column to evaluate the strength of trends.

    4. **Reversal Handling**:
    - If a trend reversal persists beyond `reverse_steps`, labels are assigned based on the cumulative product tracked in `dic1`.
    - Subsequent reversals are merged or labeled independently if conditions are met.

    5. **Label Assignment**:
    - Labels are dynamically assigned based on cumulative product thresholds for positive and negative trends:
        - Positive trends are categorized as moderate (3) or strong (4).
        - Negative trends are categorized as moderate (1) or strong (2).

    6. **Trend Filtering**:
    - After detecting trends, only those specified in `trends_to_keep` remain unchanged.
    - Any trend category not included in `trends_to_keep` is reset to 0 (No Trend).

    7. **Edge Cases**:
    - Properly handles scenarios where data points are insufficient for trend analysis or when trend phases overlap, ensuring all data points are labeled.
    """
    # Copy to avoid modifying the original DataFrame
    df = dataframe.copy()
    df['trend'] = None  # Default value 

    dic1, dic2, dic3 = None, None, None # Initialize trend tracking dictionaries
    
    def assign_label(dictio_, lower_threshold, upper_threshold):
        cumulative = dictio_['cumulative']
        if cumulative > (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 4  # Very strong positive
        elif (1 + lower_threshold) < cumulative <= (1 + upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 3  # Moderate positive
        elif (1 - upper_threshold) < cumulative <= (1 - lower_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 1  # Moderate negative
        elif cumulative <= (1 - upper_threshold):
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 2  # Very strong negative
        else:
            df.iloc[dictio_['ids'], df.columns.get_loc('trend')] = 0  # No trend
    
    # Process each log return to detect trends
    for idx, log_ret in enumerate(df[column]):
        sign = 1 if log_ret > 0 else -1

        if dic1 is None:  # Initialize dic1
            dic1 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue

        last_sign = dic1['last_sign']
        if sign == last_sign and dic2 is None:  # Continue same trend
            dic1['ids'].append(idx)
            dic1['last_sign'] = sign
            dic1['cumulative'] *= (1 + log_ret)
            continue

        # 1st Reversal occuring
        if dic2 is None:  # Start dic2
            dic2 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue

        last_sign = dic2['last_sign']
        if sign == last_sign and dic3 is None:  # Continue same trend
            dic2['ids'].append(idx)
            dic2['last_sign'] = sign
            dic2['cumulative'] *= (1 + log_ret)
            if len(dic2['ids']) == reverse_steps:
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                dic1, dic2 = dic2, None
            continue

        # 2nd Reversal occuring
        if dic3 is None:  # Start dic3
            dic3 = {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}
            continue

        last_sign = dic3['last_sign']
        if sign == last_sign: # Continue same trend, there is no dic4 to check if is None
            dic3['ids'].append(idx)
            dic3['last_sign'] = sign
            dic3['cumulative'] *= (1 + log_ret)
            dic_prod = dic2['cumulative'] * dic3['cumulative']
            if (sign == 1 and dic_prod > 1) or (sign == -1 and dic_prod < 1):
                dic1['ids'] += dic2['ids'] + dic3['ids']
                dic1['last_sign'] = dic3['last_sign']
                dic1['cumulative'] *= dic2['cumulative'] * dic3['cumulative']
                dic2, dic3 = None, None
                continue

            if len(dic3['ids']) == reverse_steps:      
                assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                assign_label(dic2, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
                dic1, dic2, dic3 = dic3, None, None
            continue
            
        # 3rd Reversal occuring
        assign_label(dic1, lower_threshold, upper_threshold) # Assign labels in the 'trend' column for ids of dic1
        dic1, dic2, dic3 = dic2, dic3, {'ids': [idx], 'last_sign': sign, 'cumulative': (1 + log_ret)}

    # Assign remaining labels
    if dic1:
        assign_label(dic1, lower_threshold, upper_threshold)
    if dic2:
        assign_label(dic2, lower_threshold, upper_threshold)
    if dic3:
        assign_label(dic3, lower_threshold, upper_threshold)
    
    # Apply filtering: Keep only selected trends, set others to 0
    df['trend'] = df['trend'].where(df['trend'].isin(trends_to_keep), 0)

    return df

def split_X_y(sequences: list[pd.DataFrame], 
              target_column: str = 'trend',
              detect_trends_function: Callable[[pd.DataFrame, str, float, float, int, set], pd.DataFrame] = detect_trends_4, 
              column: str = 'close', 
              lower_threshold: float = 0.0009, 
              upper_threshold: float = 0.015,
              reverse_steps: int = 7,
              trends_to_keep: set = {0, 1, 2, 3, 4}) -> Tuple[np.ndarray, np.ndarray]:
    """
    Process sequences to generate features (X) and labels (y) with trend detection.

    Args:
        - sequences (list[pd.DataFrame]): List of DataFrame sequences.
        - target_column (str): Column name for labels (default is 'trend').
        - detect_trends_function (Callable): Trend detection function (default is detect_trends_4).
        - column (str): Column for trend detection (default is 'close').
        - lower_threshold (float): Lower threshold for trends (default is 0.0009).
        - upper_threshold (float): Upper threshold for trends (default is 0.015).
        - reverse_steps (int): Steps for trend reversal (default is 7).
        - trends_to_keep (set): Trends to retain (default is {0, 1, 2, 3, 4}).

    Returns:
        - Tuple[np.ndarray, np.ndarray]: X (features), y (labels) as NumPy arrays.
    """
    # Initialize lists for features and labels
    X, y = [], []
    
    # Process each sequence
    for seq in sequences:
        # Apply trend detection
        seq = detect_trends_function(seq, column, lower_threshold, upper_threshold, reverse_steps, trends_to_keep)
        
        # Extract features and labels
        X.append(seq.drop(columns=[target_column]).values)
        y.append(seq[target_column].values)
    
    # Convert to NumPy arrays
    return np.array(X), np.array(y)

def process_and_return_splits(
    with_indicators_file_path: str,
    downsampled_data_minutes: int,
    exclude_columns: list[str],
    lower_threshold: float,
    upper_threshold: float,
    reverse_steps: int,
    sequence_length: int,
    sliding_interval: int,
    trends_to_keep: set = {0, 1, 2, 3, 4}  # Default keeps all trends
) -> tuple[
    list[list[float]],  # X_train: List of sequences, each containing a list of features
    list[list[int]],    # y_train: List of sequences, each containing a list of labels
    list[list[float]],  # X_val: List of sequences, each containing a list of features
    list[list[int]],    # y_val: List of sequences, each containing a list of labels
    list[list[float]],  # X_test: List of sequences, each containing a list of features
    list[list[int]]     # y_test: List of sequences, each containing a list of labels
]:
    """
    Processes time-series data from a CSV file and prepares it for machine learning.

    This function performs the following steps:
        1. Reads data from the specified CSV file and sorts it by date in descending order.
        2. Optionally downsamples the data to a lower frequency (e.g., 5-minute intervals).
        3. Applies Gaussian smoothing to reduce noise in the data.
        4. Calculates log returns for all numeric columns, excluding specified columns.
        5. Detects trends based on defined thresholds (`lower_threshold`, `upper_threshold`, and `reverse_steps`).
        6. Filters trends to keep only those specified in `trends_to_keep`, setting others to 0 (No Trend).
        7. Converts the processed data into sequences of a fixed length (`sequence_length`) with a sliding interval.
        8. Splits the sequences into training (80%), validation (10%), and test (10%) sets.
        9. Further splits the sequences into features (`X`) and labels (`y`) for supervised learning.

    Args:
        - with_indicators_file_path (str): Path to the CSV file with time-series data.
        - downsampled_data_minutes (int): Frequency for downsampling (e.g., 1 for no downsampling).
        - exclude_columns (list[str]): Columns to exclude from log return calculations.
        - lower_threshold (float): Lower threshold for trend detection.
        - upper_threshold (float): Upper threshold for trend detection.
        - reverse_steps (int): Steps for reversing trends in trend detection.
        - sequence_length (int): Length of sequences to create.
        - sliding_interval (int): Interval for sliding the window.
        - trends_to_keep (set): Trends to retain, others set to 0 (default is {0, 1, 2, 3, 4}).

    Returns:
        - tuple: X_train, y_train, X_val, y_val, X_test, y_test as lists of sequences.
    """
    def check_missing_timestamps(data: pd.DataFrame, stage: str):
        """
        Checks for missing timestamps and prints diagnostic info.
        """
        missing_timestamps = pd.date_range(
            start=data.index.min(),
            end=data.index.max(),
            freq='1min',  # Checking 1-minute frequency
            tz=data.index.tz,
        ).difference(data.index)

        print(f"\n{stage} - Missing timestamps: \n{missing_timestamps}")

        if not missing_timestamps.empty:
            for timestamp in missing_timestamps[:5]:  # Show only first 5 missing timestamps
                print(f"\nMissing timestamp: {timestamp}")

                before = data[data.index < timestamp].tail(5)  # 5 data points before
                after = data[data.index > timestamp].head(5)  # 5 data points after

                print("\nData before missing timestamp:")
                display(before) if not before.empty else print("No data available before.")

                print("\nData after missing timestamp:")
                display(after) if not after.empty else print("No data available after.")

    print("\n======== Processing Time-Series Data ========")

    # Step 1: Read & Sort Data
    data = read_csv_file(with_indicators_file_path, preview_rows=0).sort_index(ascending=False)

    # Step 2: Downsample Data
    if downsampled_data_minutes != 1:
        print("\n---> Downsampling Data")
        data = downsample_minute_data(data, downsampled_data_minutes)

    check_missing_timestamps(data, "Data Retrieved")

    # Step 3: Gaussian Smoothing
    data = gaussian_smoothing(data, sigma=7)
    check_missing_timestamps(data, "Gaussian Smoothed Data")

    # Step 4: Compute Log Returns
    data = calculate_log_returns_all_columns(data, exclude_columns=exclude_columns)
    check_missing_timestamps(data, "Log Returns Computed")

    # Step 5: Create Sequences
    sequences = created_sequences_2(data, sequence_length, sliding_interval)

    # Step 6: Train / Validation / Test Split
    train_size = int(len(sequences) * 0.8)
    val_size = int(len(sequences) * 0.1)

    train_sequences = sequences[:train_size]
    val_sequences = sequences[train_size:train_size + val_size]
    test_sequences = sequences[train_size + val_size:]

    print(f"\nNumber of sequences:\n"
          f"  - Total sequences: {len(sequences)}\n"
          f"  - Train: {len(train_sequences)}\n"
          f"  - Validation: {len(val_sequences)}\n"
          f"  - Test: {len(test_sequences)}\n")

    # Step 7: Convert Sequences to X, y
    def split_and_format_data(sequences):
        X, y = split_X_y(
            sequences, target_column='trend',
            detect_trends_function=detect_trends_4,
            column='close', lower_threshold=lower_threshold,
            upper_threshold=upper_threshold, reverse_steps=reverse_steps,
            trends_to_keep=trends_to_keep
        )
        return np.array(X), np.array(y)

    X_train, y_train = split_and_format_data(train_sequences)
    X_val, y_val = split_and_format_data(val_sequences)
    X_test, y_test = split_and_format_data(test_sequences)

    # Step 8: Data Integrity Check (Ensuring Proper Types)
    def check_data_types(X: np.ndarray, y: np.ndarray, label: str):
        """
        Checks if all values in X are float and y are integer.
        """
        unexpected_X = [(i, j, k, type(v)) for i, seq in enumerate(X)
                        for j, row in enumerate(seq)
                        for k, v in enumerate(row) if not isinstance(v, (float, np.float32))]
        unexpected_y = [(i, j, type(v)) for i, seq in enumerate(y)
                        for j, v in enumerate(seq) if not isinstance(v, (int, np.int64))]

        if unexpected_X:
            print(f"\n⚠️ Unexpected type in {label} X:")
            for i, j, k, t in unexpected_X[:5]:  # Show first 5 errors
                print(f"  Sequence {i}, Row {j}, Feature {k}: {t}")

        if unexpected_y:
            print(f"\n⚠️ Unexpected type in {label} y:")
            for i, j, t in unexpected_y[:5]:  # Show first 5 errors
                print(f"  Sequence {i}, Label {j}: {t}")

    check_data_types(X_train, y_train, "Train")
    check_data_types(X_val, y_val, "Validation")
    check_data_types(X_test, y_test, "Test")

    # Step 9: Convert y types if needed
    def convert_dtype(y: np.ndarray):
        return np.array(y, dtype=np.int64) if isinstance(y, np.ndarray) and y.dtype == np.object_ else y

    y_train, y_val, y_test = map(convert_dtype, [y_train, y_val, y_test])

    # Get feature info
    Number_features = X_train.shape[-1]
    close_col_index = data.columns.get_loc('close')
    
    print(f"\nFeature Info:\n  - close_col_index = {close_col_index}\n  - Number_features = {Number_features}")

    return X_train, y_train, X_val, y_val, X_test, y_test, Number_features

def print_class_distribution(y, var_name: str) -> None:
    """
    Prints the class distribution of a label array.

    Args:
        y: Tensor, array, or list of class labels.
        var_name: Name of the variable (for display).
    """
    if isinstance(y, torch.Tensor):
        y = y.cpu().numpy()
    flattened = np.array(y).flatten()

    unique_classes, counts = np.unique(flattened, return_counts=True)
    total = counts.sum()

    header = f"Class Distribution for '{var_name}':"
    line_parts = [
        f"Class {int(c):<3} Percent: {(count / total) * 100:>6.2f}%"
        for c, count in zip(unique_classes, counts)
    ]
    print(header.ljust(40) + " || ".join(line_parts))


## __All (Initial) parameters__

In [4]:
ticker = 'BTC-USD'
downsampled_data_minutes = 1 # No downsampling

# Step 0 (Again): Identify parameters for trend settings of the loaded data with 1,000 data points
lower_threshold = 0.0009 # 較小的價格變動門檻，代表 輕微的趨勢變化 也可能被識別為趨勢。
upper_threshold = 0.015  # 較大的價格變動門檻，當變動超過這個值，才會標記為強趨勢。
reverse_steps = 13       # 趨勢反轉的步數門檻，當價格變動連續 13 次反向時，才認為趨勢改變。

# Features not to be included in the analysis
exclude_columns= ['MACD', 'MACD_signal', 'ROC_10', 'OBV', 'AD_Line']

# Step 3, under ### Correlation Analysis
# Compute correlations with the 'trend' column
# corr = data_trends.corr()
# trend_corr = corr['trend'].sort_values(ascending=False)
strongly_correlated = ['close', 'open', 'SMA_5', 'high', 'low', 'EMA_10', 'SMA_10'] # Strongly correlated (correlation > 0.6)
moderately_correlated = ['BB_middle', 'BB_lower', 'BB_upper', 'RSI_14']             # Moderately correlated (correlation between 0.3 and 0.6)
weakly_correlated = ['SMA_50', 'volume', 'BBW', 'ATR_14']                           # Weakly correlated or negligible (correlation <~ 0.3)

# Add the weakly_correlated and moderately_correlated features to exclude_columns.
exclude_columns += weakly_correlated + moderately_correlated

sequence_length = 1000
sliding_interval = 60

## __Check GPU, CUDA, Pytorch__

### GPU Details

In [5]:
!nvidia-smi

Mon Apr 21 01:12:10 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.03                 Driver Version: 566.03         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070      WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   45C    P5             18W /  200W |    1655MiB /  12282MiB |     24%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### CUDA Details

In [6]:
def check_gpu_config():
    """
    Check GPU availability and display detailed configuration information.
    """
    # Check if GPU is available
    gpu_available = torch.cuda.is_available()
    
    # Print header
    print("=" * 50)
    print("GPU Configuration Check".center(50))
    print("=" * 50)
    
    # Basic GPU availability
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'GPU Available':<25}: {'Yes' if gpu_available else 'No'}")
    
    # If GPU is available, print detailed info
    if gpu_available:
        print("-" * 50)
        print("GPU Details".center(50))
        print("-" * 50)
        
        # Device info
        print(f"{'Device Name':<25}: {torch.cuda.get_device_name(0)}")
        print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")
        print(f"{'Current Device Index':<25}: {torch.cuda.current_device()}")
        
        # Compute capability and CUDA cores
        props = torch.cuda.get_device_properties(0)
        print(f"{'Compute Capability':<25}: {props.major}.{props.minor}")
        print(f"{'Total CUDA Cores':<25}: {props.multi_processor_count * 128}")  # Approx. 128 cores per SM
        
        # Memory info
        total_memory = props.total_memory / (1024 ** 3)  # Convert to GB
        memory_allocated = torch.cuda.memory_allocated(0) / (1024 ** 3)
        memory_reserved = torch.cuda.memory_reserved(0) / (1024 ** 3)
        print(f"{'Total Memory (GB)':<25}: {total_memory:.2f}")
        print(f"{'Allocated Memory (GB)':<25}: {memory_allocated:.2f}")
        print(f"{'Reserved Memory (GB)':<25}: {memory_reserved:.2f}")
    else:
        print("-" * 50)
        print("No GPU detected. Running on CPU.".center(50))
        print("-" * 50)
    
    print("=" * 50)

if __name__ == "__main__":
    check_gpu_config()

             GPU Configuration Check              
PyTorch Version          : 2.4.1+cu124
GPU Available            : Yes
--------------------------------------------------
                   GPU Details                    
--------------------------------------------------
Device Name              : NVIDIA GeForce RTX 4070
Number of GPUs           : 1
Current Device Index     : 0
Compute Capability       : 8.9
Total CUDA Cores         : 5888
Total Memory (GB)        : 11.99
Allocated Memory (GB)    : 0.00
Reserved Memory (GB)     : 0.00


### PyTorch Details

In [7]:
def print_torch_config():
    """Print PyTorch and CUDA configuration in a formatted manner."""
    print("=" * 50)
    print("PyTorch Configuration".center(50))
    print("=" * 50)
    
    # Basic PyTorch and CUDA info
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'CUDA Compiled Version':<25}: {torch.version.cuda}")
    print(f"{'CUDA Available':<25}: {'Yes' if torch.cuda.is_available() else 'No'}")
    print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")

    # GPU details if available
    if torch.cuda.is_available():
        print(f"{'GPU Name':<25}: {torch.cuda.get_device_name(0)}")

    print("-" * 50)
    
    # Seed setting
    torch.manual_seed(42)
    print(f"{'Random Seed':<25}: 42 (Seeding successful!)")
    
    print("=" * 50)

if __name__ == "__main__":
    print_torch_config()

              PyTorch Configuration               
PyTorch Version          : 2.4.1+cu124
CUDA Compiled Version    : 12.4
CUDA Available           : Yes
Number of GPUs           : 1
GPU Name                 : NVIDIA GeForce RTX 4070
--------------------------------------------------
Random Seed              : 42 (Seeding successful!)


## __Build the GRU Model__

### Standard LoRA

In [16]:
class LoRA(nn.Module):
    def __init__(self, linear_layer: nn.Linear, rank: int):
        """
        LoRA module applied to a specified linear layer.

        Args:
            linear_layer (nn.Linear): The linear layer to adapt (e.g., attention_fc or fc).
            rank (int): The rank of the LoRA adjustment matrices (e.g., 8).
        """
        super(LoRA, self).__init__()
        self.linear = linear_layer  # 保留對 linear_layer 的引用
        self.rank = rank
        
        # Get input and output dimensions from the linear layer
        in_features, out_features = linear_layer.weight.shape
        
        # Create LoRA matrices A and B
        self.A = nn.Parameter(torch.zeros(in_features, rank))  # Shape: (in_features, rank)
        self.B = nn.Parameter(torch.zeros(rank, out_features))  # Shape: (rank, out_features)
        
        # Initialize A with normal distribution, B with zeros
        nn.init.normal_(self.A, mean=0, std=1)
        nn.init.zeros_(self.B)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass with LoRA adjustment applied to the linear layer.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor with LoRA-adapted weights.
        """
        lora_delta = self.A @ self.B
        adapted_weight = self.linear.weight + lora_delta
        return nn.functional.linear(x, adapted_weight, self.linear.bias)
    
    def parameters(self, recurse=True):
        """
        Override parameters() to return only LoRA-specific parameters (A and B).

        Args:
            recurse (bool): Ignored, included for compatibility with nn.Module.

        Returns:
            list: List of LoRA parameters (self.A and self.B).
        """
        return [self.A, self.B]

class BiGRUWithAttention_LoRA(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout=0.0, lora_rank=8):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lora_rank = lora_rank
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True,
                          bidirectional=True, dropout=dropout if num_layers > 1 else 0)
        
        self.attention_fc = nn.Linear(hidden_size * 2, hidden_size * 2)
        self.lora_adapter = None  # 從 Period 2 開始才初始化
        self.fc = nn.Linear(hidden_size * 2, output_size)
        self.dropout = nn.Dropout(dropout)
        self.init_weights()
    
    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
    
    def init_lora(self):
        """在 Period 2 初始化一次 LoRA，之後都不再變動"""
        if self.lora_adapter is None:
            self.lora_adapter = LoRA(self.attention_fc, self.lora_rank).to(next(self.parameters()).device)
            print("Initialized LoRA adapter")

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size, device=x.device)
        out, _ = self.gru(x, h0)

        if self.lora_adapter:
            attn_out = self.lora_adapter(out)
        else:
            attn_out = self.attention_fc(out)

        attn_weights = torch.tanh(attn_out)
        out = attn_weights * out
        out = self.dropout(out)
        out = self.fc(out)
        return out

    def get_trainable_parameters(self):
        params = []
        names = []

        if self.lora_adapter:
            lora_params = list(self.lora_adapter.parameters())
            params += lora_params
            names += ['lora_adapter.' + name for name, _ in self.lora_adapter.named_parameters()]

        fc_params = list(self.fc.parameters())
        params += fc_params
        names += ['fc.' + name for name, _ in self.fc.named_parameters()]

        print(f"🧠 Trainable parameters: {len(params)} total")
        for name in names:
            print(f"  ✅ {name}")
        
        return params


## __Training and validation function__

### Analytical Function

In [9]:
def compute_classwise_accuracy(student_logits_flat, y_batch, class_correct, class_total):
    """
    Computes per-class accuracy by accumulating correct and total samples for each class using vectorized operations.
    
    Args:
        student_logits_flat (torch.Tensor): Model predictions (logits) in shape [batch_size * seq_len, output_size]
        y_batch (torch.Tensor): True labels in shape [batch_size * seq_len]
        class_correct (dict): Dictionary to store correct predictions per class
        class_total (dict): Dictionary to store total samples per class
    """
    # Ensure inputs are on the same device
    if student_logits_flat.device != y_batch.device:
        raise ValueError("student_logits_flat and y_batch must be on the same device")

    # Convert logits to predicted class indices
    predictions = torch.argmax(student_logits_flat, dim=-1)  # Shape: [batch_size * seq_len]

    # Compute correct predictions mask
    correct_mask = (predictions == y_batch)  # Shape: [batch_size * seq_len], boolean

    # Get unique labels in this batch
    unique_labels = torch.unique(y_batch)

    # Update class_total and class_correct using vectorized operations
    for label in unique_labels:
        label = label.item()  # Convert tensor to scalar
        if label not in class_total:
            class_total[label] = 0
            class_correct[label] = 0
        
        # Count total samples for this label
        label_mask = (y_batch == label)
        class_total[label] += label_mask.sum().item()
        
        # Count correct predictions for this label
        class_correct[label] += (label_mask & correct_mask).sum().item()

### Training and validation function 

In [10]:
def train_lora_baseline(model, output_size, criterion, optimizer,
                        X_train, y_train, X_val, y_val, scheduler=None,
                        num_epochs=10, batch_size=64, model_saving_folder=None,
                        model_name=None, stop_signal_file=None):
    print("\n🚀 'train_lora_baseline' started.")

    # Prepare saving folder
    if model_saving_folder:
        if os.path.exists(model_saving_folder):
            shutil.rmtree(model_saving_folder)
            print(f"✅ Removed existing folder: {model_saving_folder}")
        os.makedirs(model_saving_folder, exist_ok=True)

    model_name = model_name or 'model'
    model_saving_folder = model_saving_folder or './saved_models'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Tensor to device
    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size, shuffle=False)

    print("\n✅ Data Overview:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")

    best_results = []
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        class_correct, class_total = {}, {}

        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\n🛑 Stop signal detected. Exiting training loop.")
            break

        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch).view(-1, output_size)
            y_batch = y_batch.view(-1)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * X_batch.size(0)
            compute_classwise_accuracy(outputs, y_batch, class_correct, class_total)

        train_loss = epoch_loss / len(train_loader.dataset)
        train_acc = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%"
                     for c in sorted(class_total.keys())}

        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_class_correct, val_class_total = {}, {}
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch).view(-1, output_size)
                y_batch = y_batch.view(-1)
                val_loss += criterion(outputs, y_batch).item() * X_batch.size(0)
                predictions = torch.argmax(outputs, dim=-1)
                val_correct += (predictions == y_batch).sum().item()
                val_total += y_batch.size(0)
                compute_classwise_accuracy(outputs, y_batch, val_class_correct, val_class_total)

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / val_total
        val_acc_cls = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%"
                       for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Train-Class-Acc: {train_acc},")
        print(f"Val Loss: {val_loss:.6f}, Val Acc: {val_acc * 100:.2f}%, Val-Class-Acc: {val_acc_cls}, LR: {optimizer.param_groups[0]['lr']:.6f}")

        model_path = os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        current = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'train_classwise_accuracy': train_acc,
            'val_classwise_accuracy': val_acc_cls,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'learning_rate': optimizer.param_groups[0]['lr'],
            'model_path': model_path
        }

        if len(best_results) < 5 or val_acc > best_results[-1]['val_accuracy']:
            if len(best_results) == 5:
                to_remove = best_results.pop()
                if os.path.exists(to_remove['model_path']):
                    os.remove(to_remove['model_path'])
                    print(f"🗑 Removed: {to_remove['model_path']}")
            best_results.append(current)
            best_results.sort(key=lambda x: (x['val_accuracy'], x['epoch']), reverse=True)
            torch.save(current, model_path)
            print(f"✅ Saved model: {model_path}")

        if scheduler: scheduler.step(val_loss)

    if best_results:
        best = best_results[0]
        best_model_path = os.path.join(model_saving_folder, f"{model_name}_best.pth")
        torch.save(best, best_model_path)
        print(f"\n🏆 Best model saved as: {best_model_path} (Val Accuracy: {best['val_accuracy'] * 100:.2f}%)")

    final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
    torch.save(current, final_model_path)
    print(f"\n📌 Final model saved as: {final_model_path}")

    print("\n🎯 Top 5 Best Models:")
    for res in best_results:
        print(f"Epoch {res['epoch']}, Train Loss: {res['train_loss']:.6f}, Train-Acc: {res['train_classwise_accuracy']},\n"
              f"Val Loss: {res['val_loss']:.6f}, Val Acc: {res['val_accuracy']*100:.2f}%, Val-Acc: {res['val_classwise_accuracy']},"
              f" Model Path: {res['model_path']}")

    del X_train, y_train, X_val, y_val, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()


## __Setup before training__

### Define list_period_files_full_path

In [11]:
def setup_file_paths(pair='BTCUSD', base_dir='Data', days=190):
    """
    Set up file paths for cryptocurrency data across multiple periods.

    Args:
        pair (str): Trading pair (e.g., 'BTCUSD').
        base_dir (str): Base directory for data storage (default 'Data').
        days (int): Number of days for each period (default 190).

    Returns:
        tuple: (base_folder_path, with_indicators_file_path, list_period_files_full_path)
    """
    # Define base file name and folder structure
    file_name = f"Polygon_{pair}_4Y_1min"
    base_folder_path = os.path.normpath(os.path.join(base_dir, file_name))
    
    # Check if folder exists
    if not os.path.isdir(base_folder_path):
        raise FileNotFoundError(f"Directory '{base_folder_path}' does not exist.")

    # Define file path with indicators for Period 1
    with_indicators_file_path = os.path.normpath(
        os.path.join(base_folder_path, f"_{file_name}_{days}_days_with_indicators.csv")
    )

    # Define file paths for all periods
    list_period_files_full_path = [
        # Period 1
        with_indicators_file_path,
        # Period 2: 2020-11-11 to 2021-05-20
        os.path.normpath(os.path.join(
            base_folder_path, f"{file_name}_{days}_days__2020-11-11__2021-05-20__with_indicators.csv"
        )),
        # Period 3: 2021-05-20 to 2021-11-26
        os.path.normpath(os.path.join(
            base_folder_path, f"{file_name}_{days}_days__2021-05-20__2021-11-26__with_indicators.csv"
        )),
        # Period 4: 2021-11-26 to 2022-06-04
        os.path.normpath(os.path.join(
            base_folder_path, f"{file_name}_{days}_days__2021-11-26__2022-06-04__with_indicators.csv"
        )),
        # Period 5: 2022-06-04 to 2022-12-11
        os.path.normpath(os.path.join(
            base_folder_path, f"{file_name}_{days}_days__2022-06-04__2022-12-11__with_indicators.csv"
        )),
    ]

    return base_folder_path, with_indicators_file_path, list_period_files_full_path

def print_folder_contents(folder_path):
    """Print all files in the specified folder."""
    print("\n📂 Folder Contents:")
    for file in os.listdir(folder_path):
        print(f"Found file: {file}")

if __name__ == "__main__":
    # Set up paths
    base_folder_path, with_indicators_file_path, list_period_files_full_path = setup_file_paths()

    # Print results
    print("=" * 70)
    print("File Path Configuration".center(70))
    print("=" * 70)
    
    print(f"{'Base Folder Path':<25}: {base_folder_path}")
    print(f"{'Period 1 File Path':<25}: {with_indicators_file_path}")
    print("-" * 70)
    
    print("List of Period Files:")
    for i, path in enumerate(list_period_files_full_path, 1):
        print(f"{'Period ' + str(i):<25}: {path}")
    
    print("=" * 70)

    # Print folder contents
    print_folder_contents(base_folder_path)

                       File Path Configuration                        
Base Folder Path         : Data\Polygon_BTCUSD_4Y_1min
Period 1 File Path       : Data\Polygon_BTCUSD_4Y_1min\_Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv
----------------------------------------------------------------------
List of Period Files:
Period 1                 : Data\Polygon_BTCUSD_4Y_1min\_Polygon_BTCUSD_4Y_1min_190_days_with_indicators.csv
Period 2                 : Data\Polygon_BTCUSD_4Y_1min\Polygon_BTCUSD_4Y_1min_190_days__2020-11-11__2021-05-20__with_indicators.csv
Period 3                 : Data\Polygon_BTCUSD_4Y_1min\Polygon_BTCUSD_4Y_1min_190_days__2021-05-20__2021-11-26__with_indicators.csv
Period 4                 : Data\Polygon_BTCUSD_4Y_1min\Polygon_BTCUSD_4Y_1min_190_days__2021-11-26__2022-06-04__with_indicators.csv
Period 5                 : Data\Polygon_BTCUSD_4Y_1min\Polygon_BTCUSD_4Y_1min_190_days__2022-06-04__2022-12-11__with_indicators.csv

📂 Folder Contents:
Found file: Polyg

### __All periods data__
'trend': Categorized trend values based on the detected phases:
- 0: No trend
- 1: Moderate negative trend
- 2: Very strong negative trend
- 3: Moderate positive trend
- 4: Very strong positive trend


## __Train the Model__

---
### Period 1 (num_layers = 4, lora_r=4)
+ ##### BiGRUWithAttention_LoRA
+ ##### Training and saving in *'Standard_LoRA/Rank_4_Period_1/1st_try'*
#### __Val Accuracy: 98.36%__
#### __Val-Class-Acc: {0: '98.54%', 1: '98.09%'}__


In [12]:
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[0], # Change 
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1}  # Default keeps all trends : {0, 1, 2, 3, 4}
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")

print_class_distribution(y_train, "y_train")
print_class_distribution(y_val, "y_val")
print_class_distribution(y_test, "y_test")



Number_features = 7
unique_classes = [0 1]
num_classes = 2
Class Distribution for 'y_train':       Class 0   Percent:  63.08% || Class 1   Percent:  36.92%
Class Distribution for 'y_val':         Class 0   Percent:  60.61% || Class 1   Percent:  39.39%
Class Distribution for 'y_test':        Class 0   Percent:  58.04% || Class 1   Percent:  41.96%


In [None]:
# ==== Model Hyperparameters ====
input_size = Number_features
hidden_size = 64
output_size = num_classes   # = 2 for Period 1
num_layers = 4
dropout = 0.0
lora_r = 4
num_epochs = 1000
batch_size = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_1/1st_try"
))
ensure_folder(model_saving_folder)

# ==== Model (no LoRA initialized in Period 1) ====
model = BiGRUWithAttention_LoRA(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout,
    lora_rank=lora_r
).to(device)

# ==== 判斷是 Period 1 還是 Period 2+ ====
is_period_1 = True  # ⬅️ 你在實驗程式可以明確設定

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()

if is_period_1:
    optimizer = optim.Adam(model.parameters(), lr=0.0001)  # 訓練所有參數
else:
    model.init_lora()  # Period 2+ 才加
    optimizer = optim.Adam(model.get_trainable_parameters(), lr=0.0001)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_lora_baseline(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train, y_train=y_train,
    X_val=X_val, y_val=y_val,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRUWithAttention',
    stop_signal_file=stop_signal_file
)

# ==== Summary ====
print(f"\n✅ Training Complete. Final model: \n{model}")
print(f"unique_classes = {unique_classes}, num_classes = {num_classes}")

# ==== Clean up ====
del X_train, y_train, X_val, y_val, X_test, y_test, Number_features, unique_classes, num_classes
gc.collect()
torch.cuda.empty_cache()



🚀 'train_lora_baseline' started.
✅ Removed existing folder: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_1\1st_try

✅ Data Overview:
X_train: torch.Size([3634, 1000, 7]), y_train: torch.Size([3634, 1000])
X_val: torch.Size([454, 1000, 7]), y_val: torch.Size([454, 1000])
Epoch 1/2000, Train Loss: 0.680050, Train-Class-Acc: {0: '98.34%', 1: '1.61%'},
Val Loss: 0.671339, Val Acc: 60.61%, Val-Class-Acc: {0: '100.00%', 1: '0.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_1\1st_try\BiGRUWithAttention_epoch_1.pth
Epoch 2/2000, Train Loss: 0.659055, Train-Class-Acc: {0: '100.00%', 1: '0.00%'},
Val Loss: 0.671710, Val Acc: 60.61%, Val-Class-Acc: {0: '100.00%', 1: '0.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_1\1st_try\BiGRUWithAttention_epoch_2.pth
Epoch 3/2000, Train Loss: 0.658948, Train-Class

---
### Period 2 (num_layers = 4, lora_r=4)
+ ##### BiGRUWithAttention_LoRA
+ ##### Training and saving in *'Standard_LoRA/Rank_4_Period_2/1st_try'*
#### __Val Accuracy: 96.43%__
#### __Val-Class-Acc: {0: '98.93%', 1: '95.63%', 2: '87.04%'}__


In [15]:
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[1],  # Period 2
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2}
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")

print_class_distribution(y_train, "y_train")
print_class_distribution(y_val, "y_val")
print_class_distribution(y_test, "y_test")


Number_features = 7
unique_classes = [0 1 2]
num_classes = 3
Class Distribution for 'y_train':       Class 0   Percent:  53.41% || Class 1   Percent:  36.03% || Class 2   Percent:  10.55%
Class Distribution for 'y_val':         Class 0   Percent:  52.72% || Class 1   Percent:  36.38% || Class 2   Percent:  10.90%
Class Distribution for 'y_test':        Class 0   Percent:  49.11% || Class 1   Percent:  32.27% || Class 2   Percent:  18.62%


In [17]:
# ==== Model Hyperparameters ====
input_size = Number_features
hidden_size = 64
output_size = num_classes
num_layers = 4
dropout = 0.0
lora_r = 4
num_epochs = 1000
batch_size = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_2/1st_try"
))
ensure_folder(model_saving_folder)

# ==== Load previous model (Period 1) ====
previous_model_path = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_1/1st_try", "BiGRUWithAttention_best.pth"
))
checkpoint = torch.load(previous_model_path, map_location=device)
previous_state_dict = checkpoint["model_state_dict"]

# ==== Initialize new model ====
model = BiGRUWithAttention_LoRA(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout,
    lora_rank=lora_r
).to(device)

# ==== Load GRU & attention_fc (excluding FC and LoRA) ====
model.load_state_dict({
    k: v for k, v in previous_state_dict.items()
    if not k.startswith("fc.") and not k.startswith("lora_adapter.")
}, strict=False)

# ==== Initialize LoRA for the first time ====
model.init_lora()

# ==== Check if previous model had LoRA weights ====
has_lora = any(k.startswith("lora_adapter.") for k in previous_state_dict.keys())
if has_lora:
    model.lora_adapter.load_state_dict({
        k.replace("lora_adapter.", ""): v
        for k, v in previous_state_dict.items() if k.startswith("lora_adapter.")
    })
    print("✅ LoRA weights loaded from previous model.")
else:
    print("⚠️ Previous model has no LoRA. Using newly initialized LoRA.")

# ==== Training Setup ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.get_trainable_parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_lora_baseline(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train, y_train=y_train,
    X_val=X_val, y_val=y_val,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRUWithAttention',
    stop_signal_file=stop_signal_file
)

# ==== Summary ====
print(f"\n✅ Training Complete. Final model: \n{model}")
print(f"unique_classes = {unique_classes}, num_classes = {num_classes}")

# ==== Clean up ====
del X_train, y_train, X_val, y_val, X_test, y_test, Number_features, unique_classes, num_classes
gc.collect()
torch.cuda.empty_cache()


  checkpoint = torch.load(previous_model_path, map_location=device)


Initialized LoRA adapter
⚠️ Previous model has no LoRA. Using newly initialized LoRA.
🧠 Trainable parameters: 4 total
  ✅ lora_adapter.A
  ✅ lora_adapter.B
  ✅ lora_adapter.linear.weight
  ✅ lora_adapter.linear.bias
  ✅ fc.weight
  ✅ fc.bias

🚀 'train_lora_baseline' started.
✅ Removed existing folder: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_2\1st_try

✅ Data Overview:
X_train: torch.Size([3634, 1000, 7]), y_train: torch.Size([3634, 1000])
X_val: torch.Size([454, 1000, 7]), y_val: torch.Size([454, 1000])
Epoch 1/1000, Train Loss: 1.545942, Train-Class-Acc: {0: '23.23%', 1: '18.26%', 2: '24.33%'},
Val Loss: 1.133397, Val Acc: 34.64%, Val-Class-Acc: {0: '46.37%', 1: '23.99%', 2: '13.46%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_2\1st_try\BiGRUWithAttention_epoch_1.pth
Epoch 2/1000, Train Loss: 0.888692, Train-Class-Acc: {0: '80.58%', 1: '37.86%', 2: '13.03%'},
Val

---
### Period 3 (num_layers = 4, lora_r=4)
+ ##### BiGRUWithAttention_LoRA
+ ##### Training and saving in *'Standard_LoRA/Rank_4_Period_3/1st_try'*
#### __Val Accuracy: 91.14%__
#### __Val-Class-Acc: {0: '50.14%', 1: '97.60%', 2: '90.57%', 3: '94.29%'}__


In [18]:
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[2],  # Period 3
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3}
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")

print_class_distribution(y_train, "y_train")
print_class_distribution(y_val, "y_val")
print_class_distribution(y_test, "y_test")



Number_features = 7
unique_classes = [0 1 2 3]
num_classes = 4
Class Distribution for 'y_train':       Class 0   Percent:  13.79% || Class 1   Percent:  37.59% || Class 2   Percent:   8.89% || Class 3   Percent:  39.73%
Class Distribution for 'y_val':         Class 0   Percent:  10.26% || Class 1   Percent:  46.19% || Class 2   Percent:   4.21% || Class 3   Percent:  39.34%
Class Distribution for 'y_test':        Class 0   Percent:  10.30% || Class 1   Percent:  43.50% || Class 2   Percent:   8.50% || Class 3   Percent:  37.70%


In [19]:
# ==== Model Hyperparameters ====
input_size = Number_features
hidden_size = 64
output_size = num_classes
num_layers = 4
dropout = 0.0
lora_r = 4
num_epochs = 1000
batch_size = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_3/1st_try"
))
ensure_folder(model_saving_folder)

# ==== Load previous model (Period 2) ====
previous_model_path = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_2/1st_try", "BiGRUWithAttention_best.pth"
))
checkpoint = torch.load(previous_model_path, map_location=device)
previous_state_dict = checkpoint["model_state_dict"]

# ==== Initialize model and LoRA ====
model = BiGRUWithAttention_LoRA(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout,
    lora_rank=lora_r
).to(device)

model.init_lora()

# ==== Load GRU + attention_fc + LoRA weights ====
model.load_state_dict({
    k: v for k, v in previous_state_dict.items()
    if not k.startswith("fc.")
}, strict=False)

print("✅ Model loaded (GRU, attention_fc, and LoRA). FC layer reinitialized.")

# ==== Training Setup ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.get_trainable_parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_lora_baseline(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train, y_train=y_train,
    X_val=X_val, y_val=y_val,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRUWithAttention',
    stop_signal_file=stop_signal_file
)

print(f"\n✅ Training Complete. Final model: \n{model}")
print(f"unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, X_test, y_test, Number_features, unique_classes, num_classes
gc.collect()
torch.cuda.empty_cache()


  checkpoint = torch.load(previous_model_path, map_location=device)


Initialized LoRA adapter
✅ Model loaded (GRU, attention_fc, and LoRA). FC layer reinitialized.
🧠 Trainable parameters: 4 total
  ✅ lora_adapter.A
  ✅ lora_adapter.B
  ✅ lora_adapter.linear.weight
  ✅ lora_adapter.linear.bias
  ✅ fc.weight
  ✅ fc.bias

🚀 'train_lora_baseline' started.
✅ Removed existing folder: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_3\1st_try

✅ Data Overview:
X_train: torch.Size([3634, 1000, 7]), y_train: torch.Size([3634, 1000])
X_val: torch.Size([454, 1000, 7]), y_val: torch.Size([454, 1000])
Epoch 1/1000, Train Loss: 1.754825, Train-Class-Acc: {0: '32.53%', 1: '30.57%', 2: '5.05%', 3: '4.25%'},
Val Loss: 1.523233, Val Acc: 26.73%, Val-Class-Acc: {0: '40.22%', 1: '43.29%', 2: '3.95%', 3: '6.20%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_3\1st_try\BiGRUWithAttention_epoch_1.pth
Epoch 2/1000, Train Loss: 1.353650, Train-Class-Acc: {0: '42.10%',

---
### Period 4 (num_layers = 4, lora_r=4)
+ ##### BiGRUWithAttention_LoRA
+ ##### Training and saving in *'Standard_LoRA/Rank_4_Period_4/1st_try'*
#### __Val Accuracy: 88.21%__
#### __Val-Class-Acc: {0: '75.26%', 1: '95.05%', 2: '92.39%', 3: '94.41%', 4: '41.75%'}__


In [20]:
with contextlib.redirect_stdout(open(os.devnull, 'w')):
    X_train, y_train, X_val, y_val, X_test, y_test, Number_features = process_and_return_splits(
        with_indicators_file_path = list_period_files_full_path[3],  # Period 4
        downsampled_data_minutes = downsampled_data_minutes,
        exclude_columns = exclude_columns,
        lower_threshold = lower_threshold,
        upper_threshold = upper_threshold,
        reverse_steps = reverse_steps,
        sequence_length = sequence_length,
        sliding_interval = sliding_interval,
        trends_to_keep = {0, 1, 2, 3, 4}
    )

print(f"\nNumber_features = {Number_features}")

unique_classes = np.unique(y_val)
num_classes = len(unique_classes)
print(f"unique_classes = {unique_classes}")
print(f"num_classes = {num_classes}")

print_class_distribution(y_train, "y_train")
print_class_distribution(y_val, "y_val")
print_class_distribution(y_test, "y_test")



Number_features = 7
unique_classes = [0 1 2 3 4]
num_classes = 5
Class Distribution for 'y_train':       Class 0   Percent:   6.47% || Class 1   Percent:  39.17% || Class 2   Percent:   7.24% || Class 3   Percent:  41.06% || Class 4   Percent:   6.06%
Class Distribution for 'y_val':         Class 0   Percent:   5.35% || Class 1   Percent:  36.15% || Class 2   Percent:  14.78% || Class 3   Percent:  34.02% || Class 4   Percent:   9.70%
Class Distribution for 'y_test':        Class 0   Percent:   6.63% || Class 1   Percent:  40.53% || Class 2   Percent:   5.48% || Class 3   Percent:  41.29% || Class 4   Percent:   6.08%


In [21]:
# ==== Model Hyperparameters ====
input_size = Number_features
hidden_size = 64
output_size = num_classes
num_layers = 4
dropout = 0.0
lora_r = 4
num_epochs = 1000
batch_size = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'Classif_Bi_Dir_GRU_Model/stop_training.txt'))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_4/1st_try"
))
ensure_folder(model_saving_folder)

# ==== Load previous model (Period 3) ====
previous_model_path = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "Classif_Bi_Dir_GRU_Model/Trained_models/Standard_LoRA/Rank_4_Period_3/1st_try", "BiGRUWithAttention_best.pth"
))
checkpoint = torch.load(previous_model_path, map_location=device)
previous_state_dict = checkpoint["model_state_dict"]

# ==== Initialize model and LoRA ====
model = BiGRUWithAttention_LoRA(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout,
    lora_rank=lora_r
).to(device)

model.init_lora()

# ==== Load GRU + attention_fc + LoRA weights ====
model.load_state_dict({
    k: v for k, v in previous_state_dict.items()
    if not k.startswith("fc.")
}, strict=False)

print("✅ Model loaded (GRU, attention_fc, and LoRA). FC layer reinitialized.")

# ==== Training Setup ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.get_trainable_parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

train_lora_baseline(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train, y_train=y_train,
    X_val=X_val, y_val=y_val,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRUWithAttention',
    stop_signal_file=stop_signal_file
)

print(f"\n✅ Training Complete. Final model: \n{model}")
print(f"unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, X_test, y_test, Number_features, unique_classes, num_classes
gc.collect()
torch.cuda.empty_cache()


Initialized LoRA adapter
✅ Model loaded (GRU, attention_fc, and LoRA). FC layer reinitialized.
🧠 Trainable parameters: 4 total
  ✅ lora_adapter.A
  ✅ lora_adapter.B
  ✅ lora_adapter.linear.weight
  ✅ lora_adapter.linear.bias
  ✅ fc.weight
  ✅ fc.bias

🚀 'train_lora_baseline' started.
✅ Removed existing folder: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_4\1st_try

✅ Data Overview:
X_train: torch.Size([3634, 1000, 7]), y_train: torch.Size([3634, 1000])
X_val: torch.Size([454, 1000, 7]), y_val: torch.Size([454, 1000])


  checkpoint = torch.load(previous_model_path, map_location=device)


Epoch 1/1000, Train Loss: 1.441725, Train-Class-Acc: {0: '21.80%', 1: '17.00%', 2: '1.15%', 3: '56.43%', 4: '3.46%'},
Val Loss: 1.389300, Val Acc: 41.29%, Val-Class-Acc: {0: '24.16%', 1: '42.66%', 2: '1.81%', 3: '70.32%', 4: '3.98%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_4\1st_try\BiGRUWithAttention_epoch_1.pth
Epoch 2/1000, Train Loss: 1.138782, Train-Class-Acc: {0: '16.44%', 1: '60.53%', 2: '0.86%', 3: '76.20%', 4: '2.91%'},
Val Loss: 1.189568, Val Acc: 56.24%, Val-Class-Acc: {0: '18.15%', 1: '74.60%', 2: '1.48%', 3: '81.57%', 4: '3.47%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL\Classif_Bi_Dir_GRU_Model\Trained_models\Standard_LoRA\Rank_4_Period_4\1st_try\BiGRUWithAttention_epoch_2.pth
Epoch 3/1000, Train Loss: 0.955387, Train-Class-Acc: {0: '13.49%', 1: '80.63%', 2: '0.89%', 3: '84.97%', 4: '1.92%'},
Val Loss: 1.042064, Val Acc: 62.33%, Val-Class-Acc: {0: '15.63%', 1: '86.88%', 2: '1.73%', 3: '8

---

## 📊 Summary: Standard LoRA (BiGRUWithAttention + LoRA)  
### (`LoRA_CL_Classif_Bi_Dir_GRU_Model_ver2.ipynb`)

| Period | Model & Config                                                                                     | Validation Accuracy | Class-wise Accuracy                                                        |
|--------|----------------------------------------------------------------------------------------------------|---------------------|-----------------------------------------------------------------------------|
| 1      | `BiGRUWithAttention_LoRA`<br>(num_layers=4, lora_r=4)<br>saved in `'Standard_LoRA/Rank_4_Period_1/1st_try'` | **98.36%**          | {0: 98.54%, 1: 98.09%}                                                     |
| 2      | `BiGRUWithAttention_LoRA`<br>(num_layers=4, lora_r=4)<br>saved in `'Standard_LoRA/Rank_4_Period_2/1st_try'` | **96.43%**          | {0: 98.93%, 1: 95.63%, 2: 87.04%}                                          |
| 3      | `BiGRUWithAttention_LoRA`<br>(num_layers=4, lora_r=4)<br>saved in `'Standard_LoRA/Rank_4_Period_3/1st_try'` | **91.14%**          | {0: 50.14%, 1: 97.60%, 2: 90.57%, 3: 94.29%}                               |
| 4      | `BiGRUWithAttention_LoRA`<br>(num_layers=4, lora_r=4)<br>saved in `'Standard_LoRA/Rank_4_Period_4/1st_try'` | **88.21%**          | {0: 75.26%, 1: 95.05%, 2: 92.39%, 3: 94.41%, 4: 41.75%}                    |

---
