### Colab Mount

In [2]:

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### `data.py`

In [3]:
import pickle
import pandas as pd
import os
import json
from sklearn.preprocessing import StandardScaler
import pickle
import numpy as np
import torch
from tqdm import tqdm
"""
this file let you load the data of the stations from the pkl files
and the coordinates of the stations from the json file
"""

def normalize_coordinates(x_coords, y_coords):
    """
    Normalize the X and Y coordinates to the range [0, 1].

    Args:
        x_coords (numpy.ndarray): Array of X coordinates in meters.
        y_coords (numpy.ndarray): Array of Y coordinates in meters.

    Returns:
        tuple: Normalized X and Y coordinates as torch tensors.
    """
    x_min, x_max = x_coords.min(), x_coords.max()
    y_min, y_max = y_coords.min(), y_coords.max()

    x_normalized = (x_coords - x_min) / (x_max - x_min)
    y_normalized = (y_coords - y_min) / (y_max - y_min)

    # Convert to torch tensors
    x_normalized = torch.tensor(x_normalized, dtype=torch.float32).unsqueeze(1)  # [num_stations, 1]
    y_normalized = torch.tensor(y_normalized, dtype=torch.float32).unsqueeze(1)  # [num_stations, 1]

    return x_normalized, y_normalized
def drop_nan_rows_multiple_custom(df_list,custom_na =['-']):
    """
    Removes rows from all DataFrames in the list where any DataFrame has NaN or custom NaN representations in any column.

    Parameters:
    df_list (List[pd.DataFrame]): List of DataFrames to process.
    reset_indices (bool): Whether to reset the index after dropping rows. Defaults to True.
    custom_na (List[str]): List of custom strings to be treated as NaN. Defaults to ['-'].

    Returns:
    List[pd.DataFrame]: List of cleaned DataFrames.
    """
    if not df_list:
        raise ValueError("The list of DataFrames is empty.")

    # Ensure all DataFrames have the same number of rows
    num_rows = df_list[0].shape[0]
    for df in df_list:
        if df.shape[0] != num_rows:
            raise ValueError("All DataFrames must have the same number of rows.")

    # Step 0: Replace custom NaN representations with np.nan
    cleaned_df_list_initial = []
    for df in df_list:
        df_cleaned = df.replace(custom_na, np.nan)
        cleaned_df_list_initial.append(df_cleaned)

    # Step 1: Identify rows with any NaN in each DataFrame
    nan_indices_list = [df.isnull().any(axis=1) for df in cleaned_df_list_initial]

    # Step 2: Combine the indices where NaNs are present in any DataFrame
    combined_nan = pd.Series([False] * num_rows, index=df_list[0].index)
    for nan_mask in nan_indices_list:
        combined_nan = combined_nan | nan_mask

    # Get the indices to drop
    indices_to_drop = combined_nan[combined_nan].index

    # Step 3: Drop the identified indices from all DataFrames
    cleaned_df_list = []
    for df in tqdm(cleaned_df_list_initial, desc="Dropping NaN rows"):
        cleaned_df = df.drop(indices_to_drop)
        if True:
            cleaned_df = cleaned_df.reset_index(drop=True)
        cleaned_df_list.append(cleaned_df)

    return cleaned_df_list
def drop_nan_rows_multiple(df_list, reset_indices=True):
    """
    Removes rows from all DataFrames in the list where any DataFrame has NaN in any column.

    Parameters:
    df_list (List[pd.DataFrame]): List of DataFrames to process.
    reset_indices (bool): Whether to reset the index after dropping rows. Defaults to True.

    Returns:
    List[pd.DataFrame]: List of cleaned DataFrames.
    """
    if not df_list:
        raise ValueError("The list of DataFrames is empty.")
    #for df in df_list:
    #    df.reset_index(drop=True, inplace=True)
    # Ensure all DataFrames have the same number of rows
    num_rows = df_list[0].shape[0]
    for df in df_list:
        if df.shape[0] != num_rows:
            raise ValueError("All DataFrames must have the same number of rows.")

    # Step 1: Identify rows with any NaN in each DataFrame
    nan_indices_list = [df.isnull().any(axis=1) for df in df_list]

    # Step 2: Combine the indices where NaNs are present in any DataFrame
    combined_nan = pd.Series([False] * num_rows, index=df_list[0].index)
    for nan_mask in nan_indices_list:
        combined_nan = combined_nan | nan_mask

    # Get the indices to drop
    indices_to_drop = combined_nan[combined_nan].index

    # Step 3: Drop the identified indices from all DataFrames
    cleaned_df_list = []
    for df in tqdm(df_list, desc="Dropping NaN rows"):
        cleaned_df = df.drop(indices_to_drop)
        if reset_indices:
            cleaned_df = cleaned_df.reset_index(drop=True)
        cleaned_df_list.append(cleaned_df)

    return cleaned_df_list

# Define the normalization function
def normalize_coordinates(x_coords, y_coords):
    """
    Normalize the X and Y coordinates to the range [0, 1].
    """
    x_min, x_max = x_coords.min(), x_coords.max()
    y_min, y_max = y_coords.min(), y_coords.max()

    x_normalized = (x_coords - x_min) / (x_max - x_min)
    y_normalized = (y_coords - y_min) / (y_max - y_min)

    # Convert to torch tensors
    x_normalized = torch.tensor(x_normalized, dtype=torch.float32).unsqueeze(1)  # [num_stations, 1]
    y_normalized = torch.tensor(y_normalized, dtype=torch.float32).unsqueeze(1)  # [num_stations, 1]

    return x_normalized, y_normalized

def timeEncode(dataframes):
    day = 24*60*60
    year = (365.2425)*day

    for df in dataframes:
        if 'Date Time' in df.columns:
            timestamp_s = df['Date Time'].map(pd.Timestamp.timestamp)
            df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
            df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
            df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
            df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
            df.drop(columns=['Date Time'], inplace=True)


def preprocessing_tensor_df(df):
    """
    Apply the same preprocessing steps as during training.
    """
    print("preproccessing data...")
    # Slice the DataFrame and create a copy to avoid SettingWithCopyWarning
    df = df[5::6].copy()
    date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

    # Handle 'wv (m/s)'
    wv = df['wv (m/s)']
    bad_wv = wv == -9999.0
    df.loc[bad_wv, 'wv (m/s)'] = 0.0  # Use .loc to modify the original DataFrame
    wv = df.pop('wv (m/s)')

    # Handle 'max. wv (m/s)'
    max_wv = df['max. wv (m/s)']
    bad_max_wv = max_wv == -9999.0
    df.loc[bad_max_wv, 'max. wv (m/s)'] = 0.0  # Use .loc to modify the original DataFrame
    max_wv = df.pop('max. wv (m/s)')

    # Convert to radians.
    wd_rad = df.pop('wd (deg)') * np.pi / 180

    # Calculate wind x and y components using .loc
    df.loc[:, 'Wx'] = wv * np.cos(wd_rad)
    df.loc[:, 'Wy'] = wv * np.sin(wd_rad)
    df.loc[:, 'max Wx'] = max_wv * np.cos(wd_rad)
    df.loc[:, 'max Wy'] = max_wv * np.sin(wd_rad)

    # Time-based features
    timestamp_s = date_time.map(pd.Timestamp.timestamp)
    day = 24 * 60 * 60
    year = 365.2425 * day

    df.loc[:, 'Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
    df.loc[:, 'Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
    df.loc[:, 'Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
    df.loc[:, 'Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

    return df

def normalize_data(train_data, val_data, scaler_path='./scaler.pkl'):
    """
    Fit a StandardScaler on the training data and transform both train and val data.
    Save the scaler to disk for future use.

    Args:
        train_data (np.ndarray): Training data.
        val_data (np.ndarray): Validation data.
        scaler_path (str): Path to save the scaler.

    Returns:
        train_data_scaled (np.ndarray): Scaled training data.
        val_data_scaled (np.ndarray): Scaled validation data.
        scaler (StandardScaler): Fitted scaler object.
    """
    scaler = StandardScaler()
    scaler.fit(train_data)

    train_data_scaled = scaler.transform(train_data)
    val_data_scaled = scaler.transform(val_data)

    # Save the scaler
    with open(scaler_path, 'wb') as f:
        pickle.dump(scaler, f)

    print(f"Scaler saved to {scaler_path}")

    return train_data_scaled, val_data_scaled, scaler

def preprocessing_our_df(df):
    """
    Apply the same preprocessing steps as during training.
    """
    print("preproccessing data...")
    df = df[5::6].copy()
    # drop nan
    df = df.dropna()
    return df

def return_and_save_scaler_normalize_data(train_data, val_data, scaler_path='./scaler.pkl'):

    scaler = StandardScaler()
    scaler.fit(train_data)

    # Save the scaler
    with open(scaler_path, 'wb') as f:
        pickle.dump(scaler, f)

    print(f"Scaler saved to {scaler_path}")

    return scaler

def normalize_data_independent(train_data, val_data, scaler_dir='./scalers'):
    """
    Fit a StandardScaler per station on the training data and transform both train and val data.
    Save each scaler to disk for future use.

    Args:
        train_data (np.ndarray): Training data of shape (T_train, num_stations, num_features).
        val_data (np.ndarray): Validation data of shape (T_val, num_stations, num_features).
        scaler_dir (str): Directory path to save the scalers.

    Returns:
        train_data_scaled (np.ndarray): Scaled training data of shape (T_train, num_stations, num_features).
        val_data_scaled (np.ndarray): Scaled validation data of shape (T_val, num_stations, num_features).
        scalers (list of StandardScaler): List containing a scaler for each station.
    """
    if not os.path.exists(scaler_dir):
        os.makedirs(scaler_dir)

    T_train, num_stations, num_features = train_data.shape
    T_val = val_data.shape[0]

    # Initialize arrays to hold scaled data
    train_data_scaled = np.zeros_like(train_data)
    val_data_scaled = np.zeros_like(val_data)

    scalers = []

    for station_idx in range(num_stations):
        scaler = StandardScaler()

        # Extract training data for the current station
        train_station_data = train_data[:, station_idx, :]  # Shape: (T_train, num_features)

        # Fit the scaler on training data
        scaler.fit(train_station_data)
        scalers.append(scaler)

        # Transform training and validation data for the current station
        train_data_scaled[:, station_idx, :] = scaler.transform(train_station_data)
        val_data_scaled[:, station_idx, :] = scaler.transform(val_data[:, station_idx, :])

        # Save the scaler for the current station
        scaler_path = os.path.join(scaler_dir, f'scaler_station_{station_idx}.pkl')
        with open(scaler_path, 'wb') as f:
            pickle.dump(scaler, f)
        print(f"Scaler for Station {station_idx} saved to {scaler_path}")

    return train_data_scaled, val_data_scaled, scalers

def normalize_data_collective(train_data, val_data, scaler_path='./scaler.pkl'):
    """
    Fit a single StandardScaler across all stations and features.

    Args:
        train_data (np.ndarray): Training data of shape (T_train, num_stations, num_features).
        val_data (np.ndarray): Validation data of shape (T_val, num_stations, num_features).
        scaler_path (str): Path to save the scaler.

    Returns:
        train_scaled (np.ndarray), val_scaled (np.ndarray), scaler (StandardScaler)
    """
    T_train, num_stations, num_features = train_data.shape
    T_val = val_data.shape[0]

    # Reshape to (T_train*num_stations, num_features)
    train_reshaped = train_data.reshape(-1, num_features)
    val_reshaped = val_data.reshape(-1, num_features)

    scaler = StandardScaler()
    scaler.fit(train_reshaped)

    train_scaled = scaler.transform(train_reshaped).reshape(train_data.shape)
    val_scaled = scaler.transform(val_reshaped).reshape(val_data.shape)

    # Save the scaler
    with open(scaler_path, 'wb') as f:
        pickle.dump(scaler, f)
    print(f"Scaler saved to {scaler_path}")

    return train_scaled, val_scaled, scaler

def load_pkl_file(station_name):
    current_path = os.path.dirname(__file__)
    file_path = f"{current_path}\\..\\..\\..\\data\\{station_name}.pkl"
    try:
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
        print(f"data succsesfuly loaded from {file_path}")
        return data
    except Exception as e:
        print(f"Failed to load file:\n{e}")
        return None

def openJsonFile():
    current_path = os.path.dirname(__file__)
    file_path = f"{current_path}\\..\\..\\data code files\\stations_details_updated.json"
    with open(file_path) as file:
        stations = json.load(file)
    return stations

def loadCoordinatesNewIsraelData(stations_details, station_name):
    for station_id, station_details in stations_details.items():
        if station_details["name"] == station_name:
            return station_details["coordinates_in_a_new_israe"]["east"], station_details["coordinates_in_a_new_israe"]["north"]

def loadData(station_names):
    stations_data = {}
    stations_details = openJsonFile()
    for station in station_names:
        stations_csv = load_pkl_file(station)
        station_coordinates = loadCoordinatesNewIsraelData(stations_details, station)
        stations_data[station] = stations_csv, station_coordinates
    return stations_data
"""
# example of use for this file
if __name__ == "__main__":
    # Load the data
    stations_data = loadData(["Afeq","Harashim"])
    if "Afeq" in stations_data:
        print("Data of Afeq:")
        print(stations_data["Afeq"][0].head())

        print("Coordinate of Afeq:")
        print(stations_data["Afeq"][1])

        print("First coordinate of Afeq:")
        print(stations_data["Afeq"][1][0])

        print("Second coordinate of Afeq:")
        print(stations_data["Afeq"][1][1])
    else:
        print("Afeq data not found")

    print("yey")

"""

'\n# example of use for this file\nif __name__ == "__main__":\n    # Load the data\n    stations_data = loadData(["Afeq","Harashim"])\n    if "Afeq" in stations_data:\n        print("Data of Afeq:")\n        print(stations_data["Afeq"][0].head())\n\n        print("Coordinate of Afeq:")\n        print(stations_data["Afeq"][1])\n\n        print("First coordinate of Afeq:")\n        print(stations_data["Afeq"][1][0])\n\n        print("Second coordinate of Afeq:")\n        print(stations_data["Afeq"][1][1])\n    else:\n        print("Afeq data not found")\n\n    print("yey")\n\n'

### `window_generator_multiple_stations.py`

In [4]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class WindowGeneratorMultipleStations(Dataset):
    """
    Creates sliding windows from a multi-station dataset.
    data: shape (T, num_stations, num_features)
    input_width (int): window size for input
    label_width (int): how many time steps to predict
    shift (int): how far ahead the prediction starts after the input
    label_columns (list[int] or None): indices of the columns used as labels
    target_station_idx (int): index of the target station
    """
    def __init__(
        self,
        data,
        input_width,
        label_width,
        shift,
        label_columns=None,
        target_station_idx=0,
    ):
        super().__init__()

        if not isinstance(data, torch.Tensor):
            data = torch.tensor(data, dtype=torch.float32)
        self.data = data  # shape (T, num_stations, num_features)

        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        # total window = input plus how far to shift plus label_width
        self.total_window_size = input_width + shift - 1 + label_width

        # label columns
        self.label_columns = label_columns
        # If None, we'll just return all features as labels

        if label_columns is not None:
            self.num_label_features = len(label_columns)
        else:
            self.num_label_features = self.data.shape[-1]

        # Corrected number of samples
        self.num_samples = len(self.data) - self.total_window_size + 1

        if self.num_samples < 1:
            raise ValueError("Not enough data to create windows with these parameters.")

        self.target_station_idx = target_station_idx

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        # input window range
        x_start = idx
        x_end = x_start + self.input_width  # not inclusive

        # label window range
        y_start = x_start + self.input_width + self.shift - 1
        y_end = y_start + self.label_width

        # slice input
        x = self.data[x_start:x_end]  # shape (input_width, num_stations, num_features)

        # slice label
        x = x.permute(1, 0, 2)
        if self.label_columns is not None:
            y = self.data[y_start:y_end, self.target_station_idx, self.label_columns]  # [label_width, num_label_features]
        else:
            y = self.data[y_start:y_end, self.target_station_idx, :]  # [label_width, num_features]
        # Return x and y
        return x, y

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input window size: {self.input_width}',
            f'Label window size: {self.label_width}',
            f'Shift: {self.shift}',
            f'Label columns indices: {self.label_columns}',
            f'Target station index: {self.target_station_idx}'
        ])

"""
if __name__ == "__main__":
    import numpy as np
    import torch

    # Number of time steps, stations, and features
    T = 1000
    num_stations = 5
    num_features = 10

    # Generate random data
    np.random.seed(42)
    data = np.random.randn(T, num_stations, num_features).astype(np.float32)

    # Define target station index
    target_station_idx = 0  # Change as needed

    # Create WindowGenerator instance
    window_size = 24
    label_size = 1
    shift = 1

    window_gen = WindowGeneratorMultipleStations(
        data=data,
        input_width=window_size,
        label_width=label_size,
        shift=shift,
        label_columns=[2],  # Set to specific feature indices if needed
        target_station_idx=target_station_idx
    )

    # Create DataLoader
    batch_size = 32
    train_loader = DataLoader(window_gen, batch_size=batch_size, shuffle=True)

    # Iterate through one batch
    for batch_idx, (x, y) in enumerate(train_loader):
        print(f"Batch {batch_idx + 1}")
        print(f"x shape: {x.shape}")  # Expected: [batch_size, input_width, num_stations, num_features]
        print(f"y shape: {y.shape}")  # Expected: [batch_size, label_width]
        break  # Only show the first batch
"""

'\nif __name__ == "__main__":\n    import numpy as np\n    import torch\n\n    # Number of time steps, stations, and features\n    T = 1000\n    num_stations = 5\n    num_features = 10\n\n    # Generate random data\n    np.random.seed(42)\n    data = np.random.randn(T, num_stations, num_features).astype(np.float32)\n\n    # Define target station index\n    target_station_idx = 0  # Change as needed\n\n    # Create WindowGenerator instance\n    window_size = 24\n    label_size = 1\n    shift = 1\n\n    window_gen = WindowGeneratorMultipleStations(\n        data=data,\n        input_width=window_size,\n        label_width=label_size,\n        shift=shift,\n        label_columns=[2],  # Set to specific feature indices if needed\n        target_station_idx=target_station_idx\n    )\n\n    # Create DataLoader\n    batch_size = 32\n    train_loader = DataLoader(window_gen, batch_size=batch_size, shuffle=True)\n\n    # Iterate through one batch\n    for batch_idx, (x, y) in enumerate(train_lo

### `model.py`

In [5]:
# model.py

import torch
import torch.nn as nn
import math

class StationCNN(nn.Module):
    def __init__(self,
                 input_features=15,
                 output_per_feature=3,
                 kernel_size=3,
                 use_batch_norm=False,
                 use_residual=False):
        """
        Args:
            input_features (int): Number of input features per station.
            output_per_feature (int): Number of output channels per feature.
            kernel_size (int): Size of the convolutional kernel.
            use_batch_norm (bool): Whether to use Batch Normalization.
            use_residual (bool): Whether to use residual connections.
        """
        super(StationCNN, self).__init__()
        self.output_per_feature = output_per_feature
        self.use_batch_norm = use_batch_norm
        self.use_residual = use_residual

        # Total out_channels = input_features * output_per_feature
        self.out_channels = input_features * output_per_feature

        # First convolutional layer
        self.conv1 = nn.Conv1d(
            in_channels=input_features,
            out_channels=self.out_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            groups=input_features  # Depthwise convolution
        )
        self.relu1 = nn.ReLU()

        # Optional Batch Normalization
        if self.use_batch_norm:
            self.bn1 = nn.BatchNorm1d(self.out_channels)

        # Second convolutional layer (optional for deeper CNN)
        self.conv2 = nn.Conv1d(
            in_channels=self.out_channels,
            out_channels=self.out_channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            groups=input_features  # Maintain feature independence
        )
        self.relu2 = nn.ReLU()

        # Optional Batch Normalization
        if self.use_batch_norm:
            self.bn2 = nn.BatchNorm1d(self.out_channels)

        # Optional Residual Connection
        if self.use_residual:
            self.residual_conv = nn.Conv1d(
                in_channels=input_features,
                out_channels=self.out_channels,
                kernel_size=1,
                groups=input_features  # Depthwise 1x1 convolution
            )

    def forward(self, x):
        """
        Args:
            x (torch.Tensor): Input tensor of shape [batch_size, input_features, time_steps]

        Returns:
            torch.Tensor: Output tensor of shape [batch_size, output_per_feature, time_steps, input_features]
        """
        b, f, t = x.shape  # [batch_size, input_features, time_steps]

        # First convolution
        out = self.conv1(x)  # [batch_size, input_features * output_per_feature, time_steps]
        if self.use_batch_norm:
            out = self.bn1(out)
        out = self.relu1(out)

        # Second convolution
        out = self.conv2(out)  # [batch_size, input_features * output_per_feature, time_steps]
        if self.use_batch_norm:
            out = self.bn2(out)
        out = self.relu2(out)

        # Optional Residual Connection
        if self.use_residual:
            residual = self.residual_conv(x)  # [batch_size, input_features * output_per_feature, time_steps]
            out = out + residual
            out = self.relu2(out)

        # Reshape to [batch_size, output_per_feature, input_features, time_steps]
        out = out.view(b, self.output_per_feature, f, t)
        # Permute to [batch_size, output_per_feature, time_steps, input_features]
        out = out.permute(0, 1, 3, 2)  # [batch_size, output_per_feature, time_steps, features]

        return out  # [batch_size, output_per_feature, time_steps, features]

class CoordinatePositionalEncoding(nn.Module):
    def __init__(self, d_model):
        super(CoordinatePositionalEncoding, self).__init__()
        # Assuming two coordinates: X and Y
        self.lat_linear = nn.Linear(1, d_model // 2)
        self.lon_linear = nn.Linear(1, d_model // 2)
        self.activation = nn.ReLU()

    def forward(self, lat, lon):
        """
        Args:
            lat (torch.Tensor): [num_stations, 1] - Normalized X coordinates
            lon (torch.Tensor): [num_stations, 1] - Normalized Y coordinates
        Returns:
            torch.Tensor: [num_stations, d_model]
        """
        lat_enc = self.lat_linear(lat)  # [num_stations, d_model//2]
        lon_enc = self.lon_linear(lon)  # [num_stations, d_model//2]
        spatial_emb = self.activation(torch.cat([lat_enc, lon_enc], dim=1))  # [num_stations, d_model]
        return spatial_emb

class TemporalPositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(TemporalPositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x (torch.Tensor): [batch_size, num_stations, time_steps, d_model]
        Returns:
            torch.Tensor: [batch_size, num_stations, time_steps, d_model]
        """
        x = x + self.pe[:, :x.size(2), :].unsqueeze(1)  # [batch, num_stations, time_steps, d_model]
        return x

class TargetedWeatherPredictionModel(nn.Module):
    def __init__(self, num_stations, time_steps, feature_dim, kernel_size,
                 d_model, nhead, num_layers, target_station_idx, label_width=1,
                 output_per_feature=3, use_batch_norm=False, use_residual=False):
        """
        Args:
            num_stations (int): Number of stations.
            time_steps (int): Number of time steps in the sliding window.
            feature_dim (int): Number of features per station.
            kernel_size (int): Size of the CNN kernel.
            d_model (int): Dimension of the model (for Transformer).
            nhead (int): Number of attention heads in the Transformer.
            num_layers (int): Number of Transformer encoder layers.
            target_station_idx (int): Index of the target station.
            label_width (int): Number of prediction steps.
            output_per_feature (int): Number of output channels per feature in CNN.
            use_batch_norm (bool): Whether to use Batch Normalization in CNNs.
            use_residual (bool): Whether to use residual connections in CNNs.
        """
        super(TargetedWeatherPredictionModel, self).__init__()
        self.num_stations = num_stations
        self.time_steps = time_steps
        self.target_station_idx = target_station_idx
        self.label_width = label_width
        self.output_per_feature = output_per_feature

        # Initialize separate CNNs for each station
        self.station_cnns = nn.ModuleList([
            StationCNN(
                input_features=feature_dim,
                output_per_feature=output_per_feature,
                kernel_size=kernel_size,
                use_batch_norm=use_batch_norm,
                use_residual=use_residual
            )
            for _ in range(num_stations)
        ])

        # Coordinate Positional Encoding
        self.coord_pos_encoding = CoordinatePositionalEncoding(d_model=d_model)

        # Linear layer to map CNN features to d_model
        # New feature_dim after CNN: output_per_feature * original feature_dim
        self.feature_mapping = nn.Linear(feature_dim * output_per_feature, d_model)

        # Temporal Positional Encoding
        self.temporal_pos_encoding = TemporalPositionalEncoding(d_model=d_model, max_len=time_steps)

        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Final prediction layer
        self.fc_out = nn.Linear(d_model, label_width)  # Output label_width predictions

    def forward(self, x, lat, lon):
        """
        Args:
            x (torch.Tensor): [batch_size, num_stations, time_steps, feature_dim]
            lat (torch.Tensor): [num_stations, 1] - Normalized X coordinates
            lon (torch.Tensor): [num_stations, 1] - Normalized Y coordinates
        Returns:
            torch.Tensor: [batch_size, label_width]
        """
        batch_size, num_stations, time_steps, feature_dim = x.size()

        # Extract temporal features for each station
        # Initialize a list to collect CNN outputs
        temporal_features = []
        for i in range(num_stations):
            station_data = x[:, i, :, :]  # [batch_size, time_steps, feature_dim]
            station_data = station_data.permute(0, 2, 1)  # [batch_size, feature_dim, time_steps]
            cnn_out = self.station_cnns[i](station_data)  # [batch_size, output_per_feature, time_steps, feature_dim]
            temporal_features.append(cnn_out)

        # Stack temporal features: [batch_size, num_stations, output_per_feature, time_steps, feature_dim]
        temporal_features = torch.stack(temporal_features, dim=1)  # [batch, num_stations, output_per_feature, time_steps, features]

        # Reshape to combine output_per_feature and features dimensions
        # New shape: [batch_size, num_stations, time_steps, output_per_feature * feature_dim]
        temporal_features = temporal_features.view(batch_size, num_stations, self.output_per_feature, time_steps, feature_dim)
        temporal_features = temporal_features.permute(0, 1, 3, 2, 4)  # [batch, num_stations, time_steps, output_per_feature, features]
        temporal_features = temporal_features.contiguous().view(batch_size, num_stations, time_steps, self.output_per_feature * feature_dim)  # [batch, num_stations, time_steps, output_per_feature * features]

        # Spatial positional encoding using coordinates
        spatial_emb = self.coord_pos_encoding(lat, lon)  # [num_stations, d_model]
        spatial_emb = spatial_emb.unsqueeze(0).unsqueeze(2)  # [1, num_stations, 1, d_model]

        # Map temporal features to d_model
        temporal_features = self.feature_mapping(temporal_features)  # [batch_size, num_stations, time_steps, d_model]

        # Apply temporal positional encoding
        temporal_features = self.temporal_pos_encoding(temporal_features)  # [batch, num_stations, time_steps, d_model]

        # Combine temporal and spatial features
        combined_features = temporal_features + spatial_emb  # [batch, num_stations, time_steps, d_model]

        # Reshape for Transformer: [batch_size, num_stations * time_steps, d_model]
        combined_features = combined_features.view(batch_size, num_stations * time_steps, -1)

        # Transpose for Transformer: [sequence_length, batch_size, d_model]
        combined_features = combined_features.permute(1, 0, 2)  # [num_stations * time_steps, batch_size, d_model]

        # Transformer expects [sequence_length, batch_size, d_model]
        transformer_out = self.transformer_encoder(combined_features)  # [sequence_length, batch_size, d_model]

        # Reshape back: [batch_size, num_stations, time_steps, d_model]
        transformer_out = transformer_out.permute(1, 0, 2)  # [batch_size, sequence_length, d_model]
        transformer_out = transformer_out.view(batch_size, num_stations, time_steps, -1)  # [batch_size, num_stations, time_steps, d_model]

        # Select target station's features: [batch_size, time_steps, d_model]
        target_features = transformer_out[:, self.target_station_idx, :, :]  # [batch_size, time_steps, d_model]

        # Instead of mean pooling, retain temporal information or use other aggregation
        # Here, we'll take the last time step's features for simplicity
        last_time_step_features = target_features[:, -1, :]  # [batch_size, d_model]

        # Final prediction
        prediction = self.fc_out(last_time_step_features)  # [batch_size, label_width]

        return prediction


### `train.py`

In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm
import logging

def setup_logger(log_dir, log_filename="training.log"):

    # Check if path exists, if not, return None
    if not os.path.exists(log_dir):
        print(f"Error: Directory {log_dir} does not exist.")
        return None

    # Define log file path
    log_path = os.path.join(log_dir, log_filename)

    # Create logger
    logger = logging.getLogger("TrainingLogger")
    logger.setLevel(logging.INFO)

    # Check if the logger already has handlers (to avoid duplicate logs)
    if not logger.handlers:
        # Create a file handler
        file_handler = logging.FileHandler(log_path, mode='a')
        file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

        # Add handler to the logger
        logger.addHandler(file_handler)

    return logger
def get_logger():
    """Retrieve the logger instance."""
    return logging.getLogger("TrainingLogger")

def train_model(
        train_dataset,
        val_dataset,
        model,
        coordinates,
        epochs=50,  # Increased epochs for better exploration
        batch_size=32,
        lr=1e-4,    # Updated learning rate as per recommendation
        checkpoint_dir='./checkpoints',
        resume=False,
        device='cuda' if torch.cuda.is_available() else 'cpu',
        early_stopping_patience=10,
        scheduler_patience=5,
        scheduler_factor=0.5,
        min_lr=1e-7,
        logger_path=None
    ):
    logger = setup_logger(logger_path)
    logger.info(f"start logger")

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Move model to device
    model = model.to(device)

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Initialize scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=scheduler_factor,
        patience=scheduler_patience,
        verbose=True,
        min_lr=min_lr
    )

    best_val_loss = float('inf')
    patience_counter = 0
    start_epoch = 0

    os.makedirs(checkpoint_dir, exist_ok=True)
    latest_ckpt = os.path.join(checkpoint_dir, 'latest_checkpoint.pth')

    # Resume training if needed
    if resume and os.path.exists(latest_ckpt):
        print("Resuming training from latest checkpoint...")
        checkpoint = torch.load(latest_ckpt, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        best_val_loss = checkpoint.get('best_val_loss', float('inf'))
        print(f"Resumed from epoch {start_epoch}, best_val_loss={best_val_loss:.4f}")
        logger.info(f"Resumed from epoch {start_epoch}, best_val_loss={best_val_loss:.4f}")

    for epoch in range(start_epoch, start_epoch + epochs):
        model.train()
        epoch_loss = 0.0
        for batch_idx, (x_batch, y_batch) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{start_epoch + epochs}")):
            x_batch = x_batch.to(device)  # [batch_size, input_width, num_stations, num_features]
            y_batch = y_batch.to(device)  # [batch_size, num_label_features]
            coord1 = coordinates[0].to(device)
            coord2 = coordinates[1].to(device)
            # Forward pass with coordinates
            preds = model(x_batch, coord1, coord2)  # [batch_size, 1]

            # Flatten y_batch if necessary
            y_batch_single = y_batch.squeeze(-1)  # [batch_size]
            preds = preds.squeeze(-1)            # [batch_size]

            # Compute loss
            loss = criterion(preds, y_batch_single)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Accumulate loss
            epoch_loss += loss.item()

        # Compute average loss for the epoch
        train_loss = epoch_loss / len(train_loader)

        # Validation
        model.eval()
        val_losses = []
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                coord1 = coordinates[0].to(device)
                coord2 = coordinates[1].to(device)
                # Forward pass with coordinates
                preds_val = model(x_val, coord1, coord2)  # [batch_size, 1]

                # Flatten predictions and labels
                preds_val = preds_val.squeeze(-1)          # [batch_size]
                y_val_single = y_val.squeeze(-1)          # [batch_size]

                # Compute loss
                loss_val = criterion(preds_val, y_val_single)
                val_losses.append(loss_val.item())

        # Compute average validation loss
        val_loss = np.mean(val_losses)

        print(f"Epoch {epoch+1}/{start_epoch + epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        logger.info(f"Epoch {epoch+1}/{start_epoch + epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        # Step the scheduler based on validation loss
        scheduler.step(val_loss)

        current_lr = optimizer.param_groups[0]['lr']
        print(f"  -> Current Learning Rate: {current_lr:.6f}")
        logger.info(f"  -> Current Learning Rate: {current_lr:.6f}")
        # Early Stopping Logic
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0  # Reset counter if validation loss improves

            # Save the best model
            best_ckpt = os.path.join(checkpoint_dir, 'best_checkpoint.pth')
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_loss': best_val_loss,
            }, best_ckpt)
            print(f"  -> Best model saved at epoch {epoch+1} (val_loss={val_loss:.4f})")
            logger.info(f"  -> Best model saved at epoch {epoch+1} (val_loss={val_loss:.4f})")
        else:
            patience_counter += 1
            print(f"  -> No improvement in validation loss for {patience_counter} epoch(s)")
            logger.info(f"  -> No improvement in validation loss for {patience_counter} epoch(s)")
            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered.")
                logger.info("Early stopping triggered.")
                break  # Exit the training loop

        # Checkpoint: always save the 'latest'
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_val_loss': best_val_loss,
        }, latest_ckpt)


### `parameters.py`

In [7]:
import torch
import os
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # Determine device

# must define these 3 variables below!!!
###########################################################################################################################################################
# for training - where the output will be saved
train_base_path = '/content/drive/MyDrive/hyperparameters/24_36/4' # in general we put the folder of the path that contains the parameters.py file

# you need to put int inference_base_path,in it will be folders where each folder has the model files - inference_base_path/model_i/parameters.py, inference_base_path/model_i/scalers, inference_base_path/model_i/checkpoints.
inference_base_path = " "#os.path.dirname(__file__)
models_paths_dir_names = ['model_1'] # for instance for alot of models we want to inference: ['model_1','model_2' ... ] for one : ['model_1']

###########################################################################################################################################################



output_path = os.path.join(train_base_path, 'output')
checkpoints_path = os.path.join(output_path, 'checkpoints')
scalers_path = os.path.join(output_path, 'scalers')
inference_output_path = os.path.join(output_path, 'inference_output')

STATIONS_COORDINATES = {
    'Tavor Kadoorie':           (238440, 734540),
    'Newe Yaar':                (217010, 734820),
    'Yavneel':                  (248110, 733730),
    'En Hashofet':              (209310, 723170),
    'Eden Farm':                (246190, 708240),
    'Eshhar':                   (228530, 754390),
    'Afula Nir Haemeq':         (226260, 722410)
}

STATIONS_COORDINATES_COLAB = {
    f'/content/drive/MyDrive/final data/Tavor Kadoorie.pkl':     (238440, 734540),
    f'/content/drive/MyDrive/final data/Newe Yaar.pkl':          (217010, 734820),
    f'/content/drive/MyDrive/final data/Yavneel.pkl':            (248110, 733730),
    f'/content/drive/MyDrive/final data/En Hashofet.pkl':        (209310, 723170),
    f'/content/drive/MyDrive/final data/Eden Farm.pkl':          (246190, 708240),
    f'/content/drive/MyDrive/final data/Eshhar.pkl':             (228530, 754390),
    f'/content/drive/MyDrive/final data/Afula Nir Haemeq.pkl':   (226260, 722410)
}

STATIONS_LIST = {
    "Tavor Kadoorie":   "13",
    "Newe Yaar":        "186",
    "Yavneel":          "11",
    "En Hashofet":      "67",
    "Eden Farm":        "206",
    "Eshhar":           "205",
    "Afula Nir Haemeq": "16"
}

PARAMS = {
    #'paths_in_colab': [f'/content/Newe Yaar_data_2005_2024.pkl', f'/content/Tavor Kadoorie_data_2005_2024.pkl'],
    'paths_in_colab': [
        f'/content/drive/MyDrive/final data/Tavor Kadoorie.pkl',
        f'/content/drive/MyDrive/final data/Newe Yaar.pkl',
        f'/content/drive/MyDrive/final data/Yavneel.pkl',
        f'/content/drive/MyDrive/final data/En Hashofet.pkl',
        f'/content/drive/MyDrive/final data/Eden Farm.pkl',
        f'/content/drive/MyDrive/final data/Eshhar.pkl',
        f'/content/drive/MyDrive/final data/Afula Nir Haemeq.pkl'],
    'fileNames':        ['Tavor Kadoorie', 'Newe Yaar', 'Yavneel', 'En Hashofet', 'Eden Farm', 'Eshhar', 'Afula Nir Haemeq'],
    'target_station':   'Tavor Kadoorie',
    'target_station_desplay_name':   'Tavor Kadoorie',
    'target_station_id': 0,
    'device' :           device,
    'in_channels' :      15, # how many features we have
    'output_path':       output_path,
    'checkpoints_path':  checkpoints_path,
    'scalers_path':      scalers_path,
    'inference_output_path': inference_output_path
}

WINDOW_PARAMS = {
    'input_width' :     72, # window input size
    'label_width' :     12, # how many hours to predict to the future
    'shift' :           13,
    'label_columns' :   ['TD (degC)'],
}

"""
WINDOW_PARAMS = {
    'input_width' :     72, # window input size
    'label_width' :     12, # how many hours to predict to the future
    'shift' :           1,
    'label_columns' :   ['TD (degC)'],
}
"""
"""
WINDOW_PARAMS = {
    'input_width' :     72, # window input size
    'label_width' :     12, # how many hours to predict to the future
    'shift' :           13,
    'label_columns' :   ['TD (degC)'],
}
"""
"""
WINDOW_PARAMS = {
    'input_width' :     72, # window input size
    'label_width' :     12, # how many hours to predict to the future
    'shift' :           25,
    'label_columns' :   ['TD (degC)'],
}
"""
"""
WINDOW_PARAMS = {
    'input_width' :     72, # window input size
    'label_width' :     24, # how many hours to predict to the future
    'shift' :           37,
    'label_columns' :   ['TD (degC)'],
}
"""

"""
use_residual false to true
"""
TRAIN_PARAMS = {
    'epochs' :          50,
    'batch_size':       32,
    'lr':               1e-5,                                   # 1e-3, 1e-4, 1e-5
    'checkpoint_dir' :  PARAMS['checkpoints_path'],
    'resume':           False,
    'device':           PARAMS['device'],
    'early_stopping_patience':10,                               # how many epochs to wait before stopping the training
    'scheduler_patience':3,                                     # how many epochs to wait before reducing the learning rate
    'scheduler_factor':  0.5,                                   # the factor to reduce the learning rate
    'min_lr':            1e-7,
    'logger_path':       PARAMS['output_path']
}

ADVANCED_MODEL_PARAMS = {
    'num_stations':         len(PARAMS['fileNames']),
    'time_steps':           WINDOW_PARAMS['input_width'],
    'feature_dim':          PARAMS['in_channels'],
    'kernel_size':          3,  # cnn filter size                       4, 5, 6, 7
    'd_model':              64, # input for transformer size            64, 128
    'nhead':                8,  # number of heads in the transformer    8, 16
    'num_layers':           4,  # number of layers in the transformer - 6 - 12
    'target_station_idx':   PARAMS['target_station_id'],
    'label_width':          WINDOW_PARAMS['label_width'],
    'output_per_feature':   3,                                          # 4 ,5
    'use_batch_norm':       False,
    'use_residual':         True
}

models_paths_dir_names_full_paths = [os.path.join(inference_base_path, model_folder_name) for model_folder_name in models_paths_dir_names]

INFERENCE_PARAMS = {
    'params_path':             [os.path.join(folder, 'parameters.py') for folder in models_paths_dir_names_full_paths],
    'weights_paths':           [os.path.join(folder, 'checkpoints', 'best_checkpoint.pth') for folder in models_paths_dir_names_full_paths],
    'scaler_folder_path':      PARAMS['scalers_path'],
    'inference_output_path_per_model':  models_paths_dir_names_full_paths, # for saving the output of the inference in the model folder for each model
    'inference_output_path':  os.path.join(inference_base_path, 'inference_output'), # for saving the output of the inference of all models in one folder (later analyze.py will use it)
}

### `main.py`

In [8]:
# main.py
import os
import numpy as np

def main():
    if os.path.exists(PARAMS['output_path']) and TRAIN_PARAMS['resume'] is False:
        print(f"Error: Directory {PARAMS['output_path']} already exists. Please remove it or set resume=True.")
        return

    print(f"Using device: {PARAMS['device']}")
    east = []
    north = []
    filenames = PARAMS['paths_in_colab']
    dfs = []
    for filename in filenames:
        df = pd.read_pickle(filename)
        dfs.append(df)
        east.append(STATIONS_COORDINATES_COLAB[filename][0])
        north.append(STATIONS_COORDINATES_COLAB[filename][1])
    east = np.array(east)
    north = np.array(north)
    east_normalized, north_normalized = normalize_coordinates(east, north)

    print("size of data:")
    for i, df in enumerate(dfs):
        print(f"Station {i}: {df.shape}")

    list_of_values = [df.values for df in dfs]    # Extract Feature Values

    # Train/Validation Split per Station
    train_size = int(0.8 * len(list_of_values[0]))
    list_of_train_data = []
    list_of_val_data = []
    for values in list_of_values:
        train_data = values[:train_size]
        val_data = values[train_size:]
        list_of_train_data.append(train_data)
        list_of_val_data.append(val_data)

    # Combine Data into 3D Arrays
    combined_train_data = np.stack(list_of_train_data, axis=1)  # (T_train, num_stations, num_features)
    combined_val_data = np.stack(list_of_val_data, axis=1)      # (T_val, num_stations, num_features)

    # Normalize Data Independently per Station
    train_data_scaled, val_data_scaled, scalers = normalize_data_independent(
        train_data=combined_train_data,
        val_data=combined_val_data,
        scaler_dir=PARAMS['scalers_path']
    )
    representative_df = dfs[0]
    column_indices = {name: i for i, name in enumerate(representative_df.columns)}
    label_columns = [column_indices[WINDOW_PARAMS['label_columns'][0]]]

    # Instantiate Datasets
    train_dataset = WindowGeneratorMultipleStations(
        data=train_data_scaled,
        input_width=WINDOW_PARAMS['input_width'],
        label_width=WINDOW_PARAMS['label_width'],
        shift=      WINDOW_PARAMS['shift'],
        label_columns=label_columns,
        target_station_idx=PARAMS['target_station_id']
    )

    val_dataset = WindowGeneratorMultipleStations(
        data=val_data_scaled,
        input_width=WINDOW_PARAMS['input_width'],
        label_width=WINDOW_PARAMS['label_width'],
        shift=      WINDOW_PARAMS['shift'],
        label_columns=label_columns,
        target_station_idx=PARAMS['target_station_id']
    )

    model = TargetedWeatherPredictionModel(**ADVANCED_MODEL_PARAMS.copy())

    train_model(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        model=      model,
        coordinates=[east_normalized, north_normalized],  # Ensure these are on the correct device
        epochs=     TRAIN_PARAMS['epochs'],
        batch_size= TRAIN_PARAMS['batch_size'],
        lr=         TRAIN_PARAMS['lr'],
        checkpoint_dir=TRAIN_PARAMS['checkpoint_dir'],
        resume=     TRAIN_PARAMS['resume'],
        device=     TRAIN_PARAMS['device'],
        early_stopping_patience= TRAIN_PARAMS['early_stopping_patience'],
        scheduler_patience=TRAIN_PARAMS['scheduler_patience'],
        scheduler_factor=TRAIN_PARAMS['scheduler_factor'],
        min_lr=TRAIN_PARAMS['min_lr'],
        logger_path = TRAIN_PARAMS['logger_path']
    )

main()

Using device: cuda
size of data:
Station 0: (149315, 15)
Station 1: (149315, 15)
Station 2: (149315, 15)
Station 3: (149315, 15)
Station 4: (149315, 15)
Station 5: (149315, 15)
Station 6: (149315, 15)
Scaler for Station 0 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_0.pkl
Scaler for Station 1 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_1.pkl
Scaler for Station 2 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_2.pkl
Scaler for Station 3 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_3.pkl
Scaler for Station 4 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_4.pkl
Scaler for Station 5 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_5.pkl


INFO:TrainingLogger:start logger


Scaler for Station 6 saved to /content/drive/MyDrive/hyperparameters/24_36/4/output/scalers/scaler_station_6.pkl


Epoch 1/50: 100%|██████████| 3730/3730 [02:05<00:00, 29.83it/s]
INFO:TrainingLogger:Epoch 1/50 | Train Loss: 0.2032 | Val Loss: 0.1152
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 1 (val_loss=0.1152)


Epoch 1/50 | Train Loss: 0.2032 | Val Loss: 0.1152
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 1 (val_loss=0.1152)


Epoch 2/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.26it/s]
INFO:TrainingLogger:Epoch 2/50 | Train Loss: 0.1185 | Val Loss: 0.0912
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 2 (val_loss=0.0912)


Epoch 2/50 | Train Loss: 0.1185 | Val Loss: 0.0912
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 2 (val_loss=0.0912)


Epoch 3/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 3/50 | Train Loss: 0.1014 | Val Loss: 0.0824
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 3 (val_loss=0.0824)


Epoch 3/50 | Train Loss: 0.1014 | Val Loss: 0.0824
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 3 (val_loss=0.0824)


Epoch 4/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.31it/s]
INFO:TrainingLogger:Epoch 4/50 | Train Loss: 0.0933 | Val Loss: 0.0774
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 4 (val_loss=0.0774)


Epoch 4/50 | Train Loss: 0.0933 | Val Loss: 0.0774
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 4 (val_loss=0.0774)


Epoch 5/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 5/50 | Train Loss: 0.0889 | Val Loss: 0.0761
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 5 (val_loss=0.0761)


Epoch 5/50 | Train Loss: 0.0889 | Val Loss: 0.0761
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 5 (val_loss=0.0761)


Epoch 6/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 6/50 | Train Loss: 0.0858 | Val Loss: 0.0736
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 6 (val_loss=0.0736)


Epoch 6/50 | Train Loss: 0.0858 | Val Loss: 0.0736
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 6 (val_loss=0.0736)


Epoch 7/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.32it/s]
INFO:TrainingLogger:Epoch 7/50 | Train Loss: 0.0836 | Val Loss: 0.0717
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 7 (val_loss=0.0717)


Epoch 7/50 | Train Loss: 0.0836 | Val Loss: 0.0717
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 7 (val_loss=0.0717)


Epoch 8/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.29it/s]
INFO:TrainingLogger:Epoch 8/50 | Train Loss: 0.0817 | Val Loss: 0.0715
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 8 (val_loss=0.0715)


Epoch 8/50 | Train Loss: 0.0817 | Val Loss: 0.0715
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 8 (val_loss=0.0715)


Epoch 9/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.31it/s]
INFO:TrainingLogger:Epoch 9/50 | Train Loss: 0.0803 | Val Loss: 0.0727
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> No improvement in validation loss for 1 epoch(s)


Epoch 9/50 | Train Loss: 0.0803 | Val Loss: 0.0727
  -> Current Learning Rate: 0.000010
  -> No improvement in validation loss for 1 epoch(s)


Epoch 10/50: 100%|██████████| 3730/3730 [02:02<00:00, 30.33it/s]
INFO:TrainingLogger:Epoch 10/50 | Train Loss: 0.0791 | Val Loss: 0.0714
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 10 (val_loss=0.0714)


Epoch 10/50 | Train Loss: 0.0791 | Val Loss: 0.0714
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 10 (val_loss=0.0714)


Epoch 11/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.31it/s]
INFO:TrainingLogger:Epoch 11/50 | Train Loss: 0.0780 | Val Loss: 0.0704
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 11 (val_loss=0.0704)


Epoch 11/50 | Train Loss: 0.0780 | Val Loss: 0.0704
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 11 (val_loss=0.0704)


Epoch 12/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.28it/s]
INFO:TrainingLogger:Epoch 12/50 | Train Loss: 0.0770 | Val Loss: 0.0720
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> No improvement in validation loss for 1 epoch(s)


Epoch 12/50 | Train Loss: 0.0770 | Val Loss: 0.0720
  -> Current Learning Rate: 0.000010
  -> No improvement in validation loss for 1 epoch(s)


Epoch 13/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.31it/s]
INFO:TrainingLogger:Epoch 13/50 | Train Loss: 0.0762 | Val Loss: 0.0691
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 13 (val_loss=0.0691)


Epoch 13/50 | Train Loss: 0.0762 | Val Loss: 0.0691
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 13 (val_loss=0.0691)


Epoch 14/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.28it/s]
INFO:TrainingLogger:Epoch 14/50 | Train Loss: 0.0754 | Val Loss: 0.0685
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> Best model saved at epoch 14 (val_loss=0.0685)


Epoch 14/50 | Train Loss: 0.0754 | Val Loss: 0.0685
  -> Current Learning Rate: 0.000010
  -> Best model saved at epoch 14 (val_loss=0.0685)


Epoch 15/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.25it/s]
INFO:TrainingLogger:Epoch 15/50 | Train Loss: 0.0747 | Val Loss: 0.0686
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> No improvement in validation loss for 1 epoch(s)


Epoch 15/50 | Train Loss: 0.0747 | Val Loss: 0.0686
  -> Current Learning Rate: 0.000010
  -> No improvement in validation loss for 1 epoch(s)


Epoch 16/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.24it/s]
INFO:TrainingLogger:Epoch 16/50 | Train Loss: 0.0741 | Val Loss: 0.0686
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> No improvement in validation loss for 2 epoch(s)


Epoch 16/50 | Train Loss: 0.0741 | Val Loss: 0.0686
  -> Current Learning Rate: 0.000010
  -> No improvement in validation loss for 2 epoch(s)


Epoch 17/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.27it/s]
INFO:TrainingLogger:Epoch 17/50 | Train Loss: 0.0735 | Val Loss: 0.0693
INFO:TrainingLogger:  -> Current Learning Rate: 0.000010
INFO:TrainingLogger:  -> No improvement in validation loss for 3 epoch(s)


Epoch 17/50 | Train Loss: 0.0735 | Val Loss: 0.0693
  -> Current Learning Rate: 0.000010
  -> No improvement in validation loss for 3 epoch(s)


Epoch 18/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 18/50 | Train Loss: 0.0728 | Val Loss: 0.0689
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> No improvement in validation loss for 4 epoch(s)


Epoch 18/50 | Train Loss: 0.0728 | Val Loss: 0.0689
  -> Current Learning Rate: 0.000005
  -> No improvement in validation loss for 4 epoch(s)


Epoch 19/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.29it/s]
INFO:TrainingLogger:Epoch 19/50 | Train Loss: 0.0717 | Val Loss: 0.0718
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> No improvement in validation loss for 5 epoch(s)


Epoch 19/50 | Train Loss: 0.0717 | Val Loss: 0.0718
  -> Current Learning Rate: 0.000005
  -> No improvement in validation loss for 5 epoch(s)


Epoch 20/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.27it/s]
INFO:TrainingLogger:Epoch 20/50 | Train Loss: 0.0716 | Val Loss: 0.0676
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> Best model saved at epoch 20 (val_loss=0.0676)


Epoch 20/50 | Train Loss: 0.0716 | Val Loss: 0.0676
  -> Current Learning Rate: 0.000005
  -> Best model saved at epoch 20 (val_loss=0.0676)


Epoch 21/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.28it/s]
INFO:TrainingLogger:Epoch 21/50 | Train Loss: 0.0712 | Val Loss: 0.0727
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> No improvement in validation loss for 1 epoch(s)


Epoch 21/50 | Train Loss: 0.0712 | Val Loss: 0.0727
  -> Current Learning Rate: 0.000005
  -> No improvement in validation loss for 1 epoch(s)


Epoch 22/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.25it/s]
INFO:TrainingLogger:Epoch 22/50 | Train Loss: 0.0711 | Val Loss: 0.0695
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> No improvement in validation loss for 2 epoch(s)


Epoch 22/50 | Train Loss: 0.0711 | Val Loss: 0.0695
  -> Current Learning Rate: 0.000005
  -> No improvement in validation loss for 2 epoch(s)


Epoch 23/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.28it/s]
INFO:TrainingLogger:Epoch 23/50 | Train Loss: 0.0707 | Val Loss: 0.0689
INFO:TrainingLogger:  -> Current Learning Rate: 0.000005
INFO:TrainingLogger:  -> No improvement in validation loss for 3 epoch(s)


Epoch 23/50 | Train Loss: 0.0707 | Val Loss: 0.0689
  -> Current Learning Rate: 0.000005
  -> No improvement in validation loss for 3 epoch(s)


Epoch 24/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.31it/s]
INFO:TrainingLogger:Epoch 24/50 | Train Loss: 0.0706 | Val Loss: 0.0677
INFO:TrainingLogger:  -> Current Learning Rate: 0.000003
INFO:TrainingLogger:  -> No improvement in validation loss for 4 epoch(s)


Epoch 24/50 | Train Loss: 0.0706 | Val Loss: 0.0677
  -> Current Learning Rate: 0.000003
  -> No improvement in validation loss for 4 epoch(s)


Epoch 25/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.28it/s]
INFO:TrainingLogger:Epoch 25/50 | Train Loss: 0.0700 | Val Loss: 0.0677
INFO:TrainingLogger:  -> Current Learning Rate: 0.000003
INFO:TrainingLogger:  -> No improvement in validation loss for 5 epoch(s)


Epoch 25/50 | Train Loss: 0.0700 | Val Loss: 0.0677
  -> Current Learning Rate: 0.000003
  -> No improvement in validation loss for 5 epoch(s)


Epoch 26/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.29it/s]
INFO:TrainingLogger:Epoch 26/50 | Train Loss: 0.0699 | Val Loss: 0.0695
INFO:TrainingLogger:  -> Current Learning Rate: 0.000003
INFO:TrainingLogger:  -> No improvement in validation loss for 6 epoch(s)


Epoch 26/50 | Train Loss: 0.0699 | Val Loss: 0.0695
  -> Current Learning Rate: 0.000003
  -> No improvement in validation loss for 6 epoch(s)


Epoch 27/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 27/50 | Train Loss: 0.0696 | Val Loss: 0.0676
INFO:TrainingLogger:  -> Current Learning Rate: 0.000003
INFO:TrainingLogger:  -> No improvement in validation loss for 7 epoch(s)


Epoch 27/50 | Train Loss: 0.0696 | Val Loss: 0.0676
  -> Current Learning Rate: 0.000003
  -> No improvement in validation loss for 7 epoch(s)


Epoch 28/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.30it/s]
INFO:TrainingLogger:Epoch 28/50 | Train Loss: 0.0695 | Val Loss: 0.0683
INFO:TrainingLogger:  -> Current Learning Rate: 0.000001
INFO:TrainingLogger:  -> No improvement in validation loss for 8 epoch(s)


Epoch 28/50 | Train Loss: 0.0695 | Val Loss: 0.0683
  -> Current Learning Rate: 0.000001
  -> No improvement in validation loss for 8 epoch(s)


Epoch 29/50: 100%|██████████| 3730/3730 [02:03<00:00, 30.26it/s]
INFO:TrainingLogger:Epoch 29/50 | Train Loss: 0.0692 | Val Loss: 0.0679
INFO:TrainingLogger:  -> Current Learning Rate: 0.000001
INFO:TrainingLogger:  -> No improvement in validation loss for 9 epoch(s)


Epoch 29/50 | Train Loss: 0.0692 | Val Loss: 0.0679
  -> Current Learning Rate: 0.000001
  -> No improvement in validation loss for 9 epoch(s)


Epoch 30/50: 100%|██████████| 3730/3730 [02:02<00:00, 30.33it/s]
INFO:TrainingLogger:Epoch 30/50 | Train Loss: 0.0692 | Val Loss: 0.0681
INFO:TrainingLogger:  -> Current Learning Rate: 0.000001
INFO:TrainingLogger:  -> No improvement in validation loss for 10 epoch(s)
INFO:TrainingLogger:Early stopping triggered.


Epoch 30/50 | Train Loss: 0.0692 | Val Loss: 0.0681
  -> Current Learning Rate: 0.000001
  -> No improvement in validation loss for 10 epoch(s)
Early stopping triggered.
