# Importing Libraries

In [25]:
import os
import zipfile

## GPU Detection

In [26]:
import subprocess
try:
    gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'])
    print("Detected GPU!!!")
    GPU_AVAILABLE = True
except Exception as e:
    print(f'No GPU Detected. Running on CPU!\n{e}')
    GPU_AVIALABLE = False

Detected GPU!!!


In [4]:
import dask.dataframe as dd
import dask.array as da
from dask_cuda import LocalCUDACluster
from dask.diagnostics import ProgressBar
from dask.distributed import Client

cluster = LocalCUDACluster()
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_cuda.LocalCUDACluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 2,Total memory: 30.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:37707,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 2
Started: Just now,Total memory: 30.00 GiB

0,1
Comm: tcp://127.0.0.1:43397,Total threads: 1
Dashboard: http://127.0.0.1:44995/status,Memory: 15.00 GiB
Nanny: tcp://127.0.0.1:34719,
Local directory: /tmp/dask-scratch-space/worker-3sbwdmvs,Local directory: /tmp/dask-scratch-space/worker-3sbwdmvs

0,1
Comm: tcp://127.0.0.1:38977,Total threads: 1
Dashboard: http://127.0.0.1:42957/status,Memory: 15.00 GiB
Nanny: tcp://127.0.0.1:34701,
Local directory: /tmp/dask-scratch-space/worker-ar_tom13,Local directory: /tmp/dask-scratch-space/worker-ar_tom13


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [28]:
try:
    import cudf
    import cupy as cp
    import dask_cudf
    GPU_MODE = torch.cuda.is_available()
except ImportError:
    cudf = None
    cp = None
    dask_cudf = None
    GPU_MODE = False

In [29]:
try:
    torch.multiprocessing.set_start_method('spawn', force=True)
    print("Set multiprocessing start method to 'spawn'.")
except RuntimeError:
    pass

Set multiprocessing start method to 'spawn'.


In [30]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    roc_auc_score,
    balanced_accuracy_score,
    matthews_corrcoef,
    confusion_matrix
)
from tqdm.auto import tqdm

# Configuration Settings

In [31]:
train_dir = "/kaggle/input/magnetic-data/magnetic_data/train"
dev_dir = "/kaggle/input/magnetic-data/magnetic_data/dev"
test_dir = "/kaggle/input/magnetic-data/magnetic_data/test"
scaler_params_path = "scaler_params.csv"

# CLIENT = Client(n_workers=4, threads_per_worker=2, memory_limit='4GB')
# print(f"Dask Dashboard Link: {CLIENT.dashboard_link}\n")

features = [
    'USFLUX_log', 'TOTPOT_log', 'PIL_LEN_log', 'MEANSHR', 'TOTFZ', 'EPSZ', 'R_VALUE',
    'USFLUX_log_roll_mean5', 'USFLUX_log_roll_std5', 'TOTPOT_log_diff1', 'MEANSHR_lag3'
]
target = 'label'
class_names = ['No Flare', 'C-Class', 'M-Class', 'X-Class']

batch_size = 256

num_cpus = os.cpu_count()
pin_memory = True
load_pre_trained_model = False


# Importing the Dataset

In [32]:
def encode_target(ddf, target_col_name):
    meta_dict = {col: ddf[col].dtype for col in ddf.columns if col != target_col_name}
    meta_dict['flare_level'] = np.int8
    
    def encode_partition(df):
        df['flare_level'] = 0
        df.loc[df[target_col_name].str.startswith('C'), 'flare_level'] = 1
        df.loc[df[target_col_name].str.startswith('M'), 'flare_level'] = 2
        df.loc[df[target_col_name].str.startswith('X'), 'flare_level'] = 3
        df['flare_level'] = df['flare_level'].astype(np.int8)
        return df.drop(columns=[target_col_name])
    
    meta_df = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in meta_dict.items()})
    
    ddf = ddf.map_partitions(encode_partition, meta=meta_df)
    
    return ddf

In [33]:
def Index(ddf):
    ddf['safe_index'] = ddf['record_id'].astype(int) * 60 + ddf['seq_id'].astype(int)
    ddf = ddf.map_partitions(lambda df: df.sort_values("safe_index"))
    ddf = ddf.set_index("safe_index", sorted=False, compute=True)
    return ddf

In [34]:
# train_dir = "../data/processed/magnetic_data/train/"
ddf_train = dask_cudf.read_parquet(train_dir)
# if 'index' in ddf_train.columns:
#     ddf_train = ddf_train.drop(columns=['index'])
ddf_train = Index(ddf_train)
ddf_train = encode_target(ddf_train, target)
print(f"Indexed ddf_train: {ddf_train.index.is_monotonic_increasing.compute()}")



Indexed ddf_train: True


In [35]:
# train_dir = "../data/processed/magnetic_data/train/"
ddf_dev = dask_cudf.read_parquet(dev_dir)
ddf_dev = Index(ddf_dev)
ddf_dev = encode_target(ddf_dev, target)
print(f"Indexed ddf_dev: {ddf_dev.index.is_monotonic_increasing.compute()}")

# X_train = ddf_train[features].to_dask_array().compute_chunk_sizes()
# y_train = ddf_train['flare_level'].to_dask_array().compute_chunk_sizes()



Indexed ddf_dev: True


In [36]:
# test_dir = "../data/processed/magnetic_data/test/"
ddf_test = dask_cudf.read_parquet(test_dir)
ddf_test = Index(ddf_test)
ddf_test = encode_target(ddf_test, target)
print(f"Indexed ddf_test: {ddf_test.index.is_monotonic_increasing.compute()}")

# X_test = ddf_test[features].to_dask_array().compute_chunk_sizes()
# y_test = ddf_test['flare_level'].to_dask_array().compute_chunk_sizes()



Indexed ddf_test: True


In [37]:
df_gpu = ddf_train[['flare_level']].compute()  # Bring to a single CuDF on GPU
class_counts_multi = df_gpu['flare_level'].value_counts()
print(f"Multiclass Distribution:\n{class_counts_multi}")
print("Note the mapping: 0=No Flare, 1=C-Class, 2=M-Class, 3=X-Class\n")
class_counts = class_counts_multi.sort_index()
total = class_counts.sum()
class_weights = total / (len(class_counts) * class_counts)
class_weights_tensor = torch.tensor(class_weights.values, dtype=torch.float32).to('cuda')



Multiclass Distribution:
flare_level
0    11042040
1     1236240
2      227100
3       22620
Name: count, dtype: int64
Note the mapping: 0=No Flare, 1=C-Class, 2=M-Class, 3=X-Class



# Feature Scaling and Normalization

In [38]:
aggregations = {col: ['mean', 'std'] for col in features if col not in ['record_id', 'seq_id']}
scaler_params_df = (
    ddf_train.map_partitions(
        lambda gdf: gdf.groupby("seq_id").agg(aggregations)
    ).compute()
)

scaler_params_df.columns = ['_'.join(col).strip() for col in scaler_params_df.columns.values]
scaler_params_df.to_csv(scaler_params_path)



In [39]:
def timeseries_scaling(ddf, scaler_params_path, features):
    if isinstance(ddf, dd.DataFrame):
        ddf = dask_cudf.from_cudf(ddf.compute(), npartitions=ddf.npartitions)

    scaler_params = cudf.read_csv(scaler_params_path)
    scaler_params_ddf = dask_cudf.from_cudf(scaler_params, npartitions=1)

    ddf_scaled = ddf.merge(scaler_params_ddf, on="seq_id", how="left")

    for col in features:
        mean_col = f"{col}_mean"
        std_col = f"{col}_std"
        ddf_scaled[col] = (ddf_scaled[col] - ddf_scaled[mean_col]) / (ddf_scaled[std_col] + 1e-7)

    return ddf_scaled.persist()

In [None]:
ddf_train = timeseries_scaling(ddf_train, scaler_params_path, features).persist()
ddf_dev = timeseries_scaling(ddf_dev, scaler_params_path, features).persist()
ddf_test = timeseries_scaling(ddf_test, scaler_params_path, features).persist()
# ddf_train.head(100)

In [19]:
!nvidia-smi

Sun Nov  9 21:27:30 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   75C    P0             34W /   70W |    9277MiB /  15360MiB |      2%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

# DataLoader

In [41]:
class TimeSeriesDataset(Dataset):
    def __init__(self, ddf, features=features, target="flare_level", chunk_size=10000):
        self.features_col = features
        self.target = target
        seq_length = 60
        num_features = len(features)
        
        unique_ids_series = ddf["record_id"].unique().compute()
        
        if hasattr(unique_ids_series, 'to_arrow'):
            record_list = unique_ids_series.to_arrow().to_pylist()
        elif hasattr(unique_ids_series, 'to_pandas'):
            record_list = unique_ids_series.to_pandas().tolist()
        else:
            record_list = list(unique_ids_series)
        
        num_records = len(record_list)        
        self.X = []
        self.y = []
        
        num_chunks = (num_records + chunk_size - 1) // chunk_size
        
        for chunk_idx in range(num_chunks):
            start_idx = chunk_idx * chunk_size
            end_idx = min(start_idx + chunk_size, num_records)
            chunk_records = record_list[start_idx:end_idx]
            
            try:
                chunk_ddf = ddf[ddf["record_id"].isin(chunk_records)]
                chunk_df = chunk_ddf.compute()
                
                if hasattr(chunk_df, 'to_pandas'):
                    chunk_df = chunk_df.to_pandas()
                
                grouped = chunk_df.groupby("record_id")
                
                for record_id, seq_df in grouped:
                    # actual_len = len(seq_df)
                    
                    # if actual_len != seq_length and len(self.X) < 5:
                    #     print(f"\t\tRecord {record_id}: {actual_len} rows (expected {seq_length})")
                    
                    # if actual_len >= seq_length:
                    #     seq_df = seq_df.iloc[:seq_length]
                    # else:
                    #     continue
                    
                    try:
                        features_array = seq_df[self.features_col].values.astype(np.float32)
                        target_value = seq_df[self.target].mode().iloc[0]
                        
                        self.X.append(features_array)
                        self.y.append(target_value)
                        
                    except Exception as e:
                        if len(self.X) < 3:
                            print(f"ERROR: {e}")
                        continue
                
                del chunk_df, chunk_ddf
                
            except Exception as e:
                print(f"CHUNK ERROR: {e}")
                continue
        
        if len(self.X) == 0:
            raise ValueError("No sequences loaded! Check preprocessing pipeline.")
        
        self.X = np.array(self.X, dtype=np.float32)
        self.y = np.array(self.y, dtype=np.int64)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return (
            torch.from_numpy(self.X[idx]),
            torch.tensor(self.y[idx], dtype=torch.long)
        )

In [42]:
def make_dataloader(dataset, batch_size=64, shuffle=True):
    return DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=0,
        pin_memory=not GPU_MODE,
        prefetch_factor=None
    )

In [None]:
train_dataset = TimeSeriesDataset(ddf_train.persist())
train = make_dataloader(train_dataset)

dev_dataset = TimeSeriesDataset(ddf_dev.persist())
dev = make_dataloader(dev_dataset)

test_dataset = TimeSeriesDataset(ddf_test.persist())
test = make_dataloader(test_dataset)

# Attentive RNN

In [44]:
class AttentiveRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, attention = False, bidirectional = False):
        super().__init__()

        self.hidden_size = hidden_size
        if attention in ["dot", "concat", None]:
            self.attention = attention
        else:
            raise ValueError("attention can only be 'dot' or 'concat'!!!")
        self.num_directions = 2 if bidirectional else 1

        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True, bidirectional = bidirectional)
        self.concat = nn.Linear(hidden_size * self.num_directions, 1)

        self.fc = nn.Linear(hidden_size * self.num_directions, num_classes)
        
    def forward(self, X):
        # X shape: (batch_size, seq_len, input_size)
        batch_size, seq_len, input_size = X.shape
        rnn_out, h_n = self.rnn(X)
        # rnn_output: (batch_size, seq_len, hidden_size * num_directions)
        # h_n: (num_layers * num_directions, batch_size, hidden_size)
        score = None
        if self.attention == "concat":
            atten_input = rnn_out.reshape(-1, self.hidden_size * self.num_directions)     # (batch_size * seq-Len, hidden_size * num_directons)
            score = self.concat(atten_input).view(batch_size, -1)                         # (batch_size, seq_len)
        
        elif self.attention == "dot":
            
            if self.num_directions == 2:
                last_hidden = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
            else:
                last_hidden = h_n[-1,:,:]
        
            query = last_hidden.unsqueeze(1)                                              # (batch_size, 1, hidden_size * num_directions)
            score = torch.bmm(query, rnn_out.transpose(1,2)).squeeze(1)                   # (batch_size, seq_len)
        
        context = None
        attn_weights = None
        
        if score is not None:
            attn_weights = F.softmax(score, dim=1)                                       # (batch_size, seq_len)
            context = torch.bmm(attn_weights.unsqueeze(1), rnn_out)                       # (batch_size, 1, hidden_size * num_directions)
            context = context.squeeze(1)                                                  # (batch_size, hidden_size * num_directions)
        else:
            if self.num_directions == 2:
                context = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
            else:
                context = h_n[-1,:,:]

        output = self.fc(context)
        
        return output, attn_weights

# Attentive LSTM

In [45]:
class AttentiveLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, attention=None, bidirectional=False):
        super().__init__()

        self.hidden_size = hidden_size
        if attention in ["dot", "concat", None]:
            self.attention = attention
        else:
            raise ValueError("attention can only be 'dot', 'concat', or None!!!")
        
        self.num_directions = 2 if bidirectional else 1

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        
        if self.attention == "concat":
            self.attention_layer = nn.Linear(hidden_size * self.num_directions, 1)

        self.fc = nn.Linear(hidden_size * self.num_directions, num_classes)
        
    def forward(self, X):
        # X shape: (batch_size, seq_len, input_size)
        batch_size, seq_len, _ = X.shape
        
        lstm_out, (h_n, c_n) = self.lstm(X)
        # lstm_out: (batch_size, seq_len, hidden_size * num_directions)
        # h_n: (num_layers * num_directions, batch_size, hidden_size)
        # c_n: (num_layers * num_directions, batch_size, hidden_size)
        
        score = None
        if self.attention == "concat":
            atten_input = lstm_out.reshape(-1, self.hidden_size * self.num_directions)
            score = self.attention_layer(atten_input).view(batch_size, -1)
        
        elif self.attention == "dot":
            if self.num_directions == 2:
                last_hidden = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
            else:
                last_hidden = h_n[-1,:,:]
        
            query = last_hidden.unsqueeze(1)
            score = torch.bmm(query, lstm_out.transpose(1,2)).squeeze(1)
        
        context = None
        attn_weights = None
        
        if self.attention is not None:
            attn_weights = F.softmax(score, dim=1)
            context = torch.bmm(attn_weights.unsqueeze(1), lstm_out)
            context = context.squeeze(1)
        else:
            if self.num_directions == 2:
                context = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim=1)
            else:
                context = h_n[-1,:,:]

        output = self.fc(context)
        
        return output, attn_weights

# Training and Evaluation Functions

In [46]:
def training(model, device, criterion, optimizer, train_loader, train_epochs=10, pre_epochs=0):
    print(f"Starting training for {train_epochs} epochs on {device}.")
    train_start = time.time()
    epoch_pbar = tqdm(range(train_epochs), desc="Training Progress", unit="epoch")
    
    for epoch in epoch_pbar:
        model.train()
        total_loss = 0.0
        all_predictions = []
        all_targets = []
        
        for features, labels in train_loader:
            features = features.to(device) 
            labels = labels.to(device)

            optimizer.zero_grad()
            logits, _ = model(features)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            _, predicted_classes = torch.max(logits, 1)
            all_predictions.extend(predicted_classes.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())

        avg_loss = total_loss / len(train_loader)
        accuracy = accuracy_score(all_targets, all_predictions)
        
        print(f"Epoch {pre_epochs + epoch + 1} Summary -> Avg. Loss: {avg_loss:.4f}, Accuracy: {accuracy*100:.2f}%")

    total_seconds = time.time() - train_start
    hours, rem = divmod(total_seconds, 3600)
    minutes, seconds = divmod(rem, 60)
    print(f"\nTotal Train Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")

In [47]:
def eval_detailed(model, device, loader, report_title="Evaluation"):
    model.eval()
    all_targets, all_predictions, all_probas = [], [], []

    with torch.no_grad():
        for features, labels in tqdm(loader, desc=f"Evaluating ({report_title})", leave=False):
            features, labels = features.to(device), labels.to(device)
            logits, _ = model(features)
            probas = F.softmax(logits, dim=1)
            preds = torch.argmax(probas, dim=1)

            all_targets.extend(labels.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probas.extend(probas.cpu().numpy())

    y_true = np.array(all_targets)
    y_pred = np.array(all_predictions)
    y_pred_proba = np.array(all_probas)

    print(f"\n================= {report_title} =================")
    print("Classification Report:\n")
    print(classification_report(
        y_true,
        y_pred,
        target_names=class_names,
        digits=4,
        zero_division=0
    ))

    try:
        roc_auc = roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='weighted')
    except ValueError:
        roc_auc = float("nan")

    metrics = {
        "balanced_accuracy": balanced_accuracy_score(y_true, y_pred),
        "roc_auc_weighted": roc_auc,
        "mcc": matthews_corrcoef(y_true, y_pred),
        "f1_macro": f1_score(y_true, y_pred, average='macro', zero_division=0),
        "f1_weighted": f1_score(y_true, y_pred, average='weighted', zero_division=0)
    }

    cm = confusion_matrix(y_true, y_pred, normalize='true')
    print("\nNormalized Confusion Matrix:\n", cm)

    print("\nKey Metrics Summary:")
    for k, v in metrics.items():
        print(f"  {k:20s}: {v:.4f}")

    return metrics


In [25]:
def save_model_as_zip(model, model_name, save_dir="/kaggle/working"):
    # File paths
    model_path = os.path.join(save_dir, f"{model_name}.pth")
    zip_path = os.path.join(save_dir, f"{model_name}.zip")

    # Save model
    torch.save(model.state_dict(), model_path)

    # Zip it
    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
        zipf.write(model_path, arcname=f"{model_name}.pth")

    # Remove unzipped .pth file
    os.remove(model_path)

    # Get zipped size in MB
    size_bytes = os.path.getsize(zip_path)
    size_mb = size_bytes / (1024 ** 2)

    print(f"{model_name}.zip saved at {zip_path} | Size: {size_mb:.2f} MB")

# Experimenter

In [28]:
experiments = []
model_types = ["RNN", "LSTM"]
attention_types = [None, "dot", "concat"]
directionality = [False, True]

for model_t in model_types:
    for att_t in attention_types:
        for is_bi in directionality:
            # Create a descriptive name for the run
            direction_str = "Bi" if is_bi else "Uni"
            attention_str = att_t if att_t is not None else "NoAtt"
            run_name = f"{model_t}_{direction_str}_{attention_str}"
            
            config = {
                "run_name": run_name,
                "model_type": model_t,
                "hidden_size": 128,
                "num_layers": 2,
                "attention": att_t,
                "bidirectional": is_bi,
                "lr": 0.001,
                "epochs": 60
            }
            experiments.append(config)

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

results_log = []
total_start_time = time.time()

for i, config in enumerate(experiments):
    print(f"\n{'='*30} Experiment {i+1}/{len(experiments)}: {config['run_name']} {'='*30}")
    
    experiment_start_time = time.time()
    
    if config["model_type"] == "RNN":
        model = AttentiveRNN(
            input_size=len(features),
            hidden_size=config["hidden_size"],
            num_layers=config["num_layers"],
            num_classes=len(class_names),
            attention=config["attention"],
            bidirectional=config["bidirectional"]
        )
    else: # LSTM
        model = AttentiveLSTM(
            input_size=len(features),
            hidden_size=config["hidden_size"],
            num_classes=len(class_names),
            num_layers=config["num_layers"],
            attention=config["attention"],
            bidirectional=config["bidirectional"]
        )
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs via DataParallel!")
        model = nn.DataParallel(model)
    model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    
    print(f"Training model: {config['run_name']} for {config['epochs']} epochs...")
    training(model, device, criterion, optimizer, train, train_epochs=config["epochs"])
    
    print(f"Evaluating model: {config['run_name']}...")
    eval_metrics = eval_detailed(model, device, dev, report_title=config['run_name'])
    
    experiment_time = time.time() - experiment_start_time
    save_model_as_zip(model, model_name=config['run_name'])
    
    run_result = {
        "run_name": config["run_name"],
        "model_type": config["model_type"],
        "num_layers": config["num_layers"],
        "hidden_size": config["hidden_size"],
        "attention": str(config["attention"]),
        "bidirectional": config["bidirectional"],
        **eval_metrics,
        "training_time_min": experiment_time / 60
    }
    results_log.append(run_result)
    
    pd.DataFrame(results_log).to_csv("experiment_report_intermediate.csv", index=False)


print(f"\n{'='*30} FINAL EXPERIMENT REPORT {'='*30}")
total_run_time = time.time() - total_start_time
print(f"Total experiment time: {total_run_time/60:.2f} minutes")

results_df = pd.DataFrame(results_log)
results_df = results_df.sort_values(by="mcc", ascending=False).reset_index(drop=True)

pd.set_option('display.float_format', '{:.4f}'.format)
display(results_df)

results_df.to_csv("experiment_report_final.csv", index=False)


Using 2 GPUs via DataParallel!
Training model: RNN_Uni_NoAtt for 60 epochs...
Starting training for 60 epochs on cuda.


Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8937, Accuracy: 74.97%
Epoch 2 Summary -> Avg. Loss: 0.8696, Accuracy: 74.98%
Epoch 3 Summary -> Avg. Loss: 0.8531, Accuracy: 74.35%
Epoch 4 Summary -> Avg. Loss: 0.8517, Accuracy: 74.64%
Epoch 5 Summary -> Avg. Loss: 0.8438, Accuracy: 74.53%
Epoch 6 Summary -> Avg. Loss: 0.8540, Accuracy: 74.18%
Epoch 7 Summary -> Avg. Loss: 0.9433, Accuracy: 72.51%
Epoch 8 Summary -> Avg. Loss: 0.8693, Accuracy: 75.24%
Epoch 9 Summary -> Avg. Loss: 0.8577, Accuracy: 74.73%
Epoch 10 Summary -> Avg. Loss: 0.8692, Accuracy: 73.64%
Epoch 11 Summary -> Avg. Loss: 0.8580, Accuracy: 73.41%
Epoch 12 Summary -> Avg. Loss: 0.8534, Accuracy: 74.13%
Epoch 13 Summary -> Avg. Loss: 0.8591, Accuracy: 74.06%
Epoch 14 Summary -> Avg. Loss: 0.8486, Accuracy: 75.31%
Epoch 15 Summary -> Avg. Loss: 0.8425, Accuracy: 74.59%
Epoch 16 Summary -> Avg. Loss: 0.8526, Accuracy: 75.08%
Epoch 17 Summary -> Avg. Loss: 0.8467, Accuracy: 75.10%
Epoch 18 Summary -> Avg. Loss: 0.8455, Accuracy: 74.53%
E

Evaluating (RNN_Uni_NoAtt):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9781    0.8072    0.8845      1878
     C-Class     0.2725    0.6619    0.3861       210
     M-Class     0.2429    0.4359    0.3119        39
     X-Class     0.0000    0.0000    0.0000         4

    accuracy                         0.7846      2131
   macro avg     0.3734    0.4763    0.3956      2131
weighted avg     0.8932    0.7846    0.8232      2131


Normalized Confusion Matrix:
 [[8.07241747e-01 1.85303514e-01 6.92225772e-03 5.32481363e-04]
 [1.57142857e-01 6.61904762e-01 1.80952381e-01 0.00000000e+00]
 [2.56410256e-02 5.38461538e-01 4.35897436e-01 0.00000000e+00]
 [0.00000000e+00 5.00000000e-01 5.00000000e-01 0.00000000e+00]]

Key Metrics Summary:
  balanced_accuracy   : 0.4763
  roc_auc_weighted    : 0.9026
  mcc                 : 0.4025
  f1_macro            : 0.3956
  f1_weighted         : 0.8232
RNN_Uni_NoAtt.zip saved at /kaggle/working/RNN_Uni_NoAtt.zip | Size: 0.18 MB

U

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8980, Accuracy: 75.19%
Epoch 2 Summary -> Avg. Loss: 0.8642, Accuracy: 75.45%
Epoch 3 Summary -> Avg. Loss: 0.8498, Accuracy: 75.62%
Epoch 4 Summary -> Avg. Loss: 0.8441, Accuracy: 75.96%
Epoch 5 Summary -> Avg. Loss: 0.8369, Accuracy: 76.32%
Epoch 6 Summary -> Avg. Loss: 0.8266, Accuracy: 76.27%
Epoch 7 Summary -> Avg. Loss: 0.8167, Accuracy: 76.40%
Epoch 8 Summary -> Avg. Loss: 0.8083, Accuracy: 76.35%
Epoch 9 Summary -> Avg. Loss: 0.8037, Accuracy: 76.07%
Epoch 10 Summary -> Avg. Loss: 0.8087, Accuracy: 76.07%
Epoch 11 Summary -> Avg. Loss: 0.7983, Accuracy: 76.23%
Epoch 12 Summary -> Avg. Loss: 0.7960, Accuracy: 76.09%
Epoch 13 Summary -> Avg. Loss: 0.8112, Accuracy: 75.87%
Epoch 14 Summary -> Avg. Loss: 0.7909, Accuracy: 76.04%
Epoch 15 Summary -> Avg. Loss: 0.7837, Accuracy: 76.38%
Epoch 16 Summary -> Avg. Loss: 0.7856, Accuracy: 76.59%
Epoch 17 Summary -> Avg. Loss: 0.7767, Accuracy: 76.62%
Epoch 18 Summary -> Avg. Loss: 0.7956, Accuracy: 76.31%
E

Evaluating (RNN_Bi_NoAtt):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9894    0.7455    0.8503      1878
     C-Class     0.2365    0.6048    0.3400       210
     M-Class     0.1486    0.5641    0.2353        39
     X-Class     0.0645    0.5000    0.1143         4

    accuracy                         0.7278      2131
   macro avg     0.3598    0.6036    0.3850      2131
weighted avg     0.8981    0.7278    0.7874      2131


Normalized Confusion Matrix:
 [[0.74547391 0.2113951  0.04153355 0.00159744]
 [0.07142857 0.6047619  0.21904762 0.1047619 ]
 [0.         0.33333333 0.56410256 0.1025641 ]
 [0.         0.         0.5        0.5       ]]

Key Metrics Summary:
  balanced_accuracy   : 0.6036
  roc_auc_weighted    : 0.9065
  mcc                 : 0.3602
  f1_macro            : 0.3850
  f1_weighted         : 0.7874
RNN_Bi_NoAtt.zip saved at /kaggle/working/RNN_Bi_NoAtt.zip | Size: 0.48 MB

Using 2 GPUs via DataParallel!
Training model: RNN_Uni_dot for 60 e

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8832, Accuracy: 75.52%
Epoch 2 Summary -> Avg. Loss: 0.8972, Accuracy: 73.76%
Epoch 3 Summary -> Avg. Loss: 0.8909, Accuracy: 74.16%
Epoch 4 Summary -> Avg. Loss: 0.8866, Accuracy: 73.27%
Epoch 5 Summary -> Avg. Loss: 0.8629, Accuracy: 75.34%
Epoch 6 Summary -> Avg. Loss: 0.8628, Accuracy: 74.88%
Epoch 7 Summary -> Avg. Loss: 0.8567, Accuracy: 74.95%
Epoch 8 Summary -> Avg. Loss: 0.8511, Accuracy: 74.86%
Epoch 9 Summary -> Avg. Loss: 0.8496, Accuracy: 74.61%
Epoch 10 Summary -> Avg. Loss: 0.8379, Accuracy: 75.28%
Epoch 11 Summary -> Avg. Loss: 0.8484, Accuracy: 74.57%
Epoch 12 Summary -> Avg. Loss: 0.8487, Accuracy: 73.98%
Epoch 13 Summary -> Avg. Loss: 0.8416, Accuracy: 74.65%
Epoch 14 Summary -> Avg. Loss: 0.8375, Accuracy: 74.55%
Epoch 15 Summary -> Avg. Loss: 0.8372, Accuracy: 74.42%
Epoch 16 Summary -> Avg. Loss: 0.8362, Accuracy: 73.71%
Epoch 17 Summary -> Avg. Loss: 0.8415, Accuracy: 74.55%
Epoch 18 Summary -> Avg. Loss: 0.8328, Accuracy: 73.96%
E

Evaluating (RNN_Uni_dot):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9819    0.7497    0.8502      1878
     C-Class     0.2374    0.7190    0.3570       210
     M-Class     0.2623    0.4103    0.3200        39
     X-Class     0.0000    0.0000    0.0000         4

    accuracy                         0.7391      2131
   macro avg     0.3704    0.4698    0.3818      2131
weighted avg     0.8935    0.7391    0.7903      2131


Normalized Confusion Matrix:
 [[0.74973376 0.24547391 0.00479233 0.        ]
 [0.11904762 0.71904762 0.16190476 0.        ]
 [0.02564103 0.56410256 0.41025641 0.        ]
 [0.         0.5        0.5        0.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.4698
  roc_auc_weighted    : 0.8977
  mcc                 : 0.3718
  f1_macro            : 0.3818
  f1_weighted         : 0.7903
RNN_Uni_dot.zip saved at /kaggle/working/RNN_Uni_dot.zip | Size: 0.18 MB

Using 2 GPUs via DataParallel!
Training model: RNN_Bi_dot for 60 epoc

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.9333, Accuracy: 74.19%
Epoch 2 Summary -> Avg. Loss: 0.9276, Accuracy: 73.49%
Epoch 3 Summary -> Avg. Loss: 0.9184, Accuracy: 73.35%
Epoch 4 Summary -> Avg. Loss: 0.8956, Accuracy: 74.51%
Epoch 5 Summary -> Avg. Loss: 0.8792, Accuracy: 74.67%
Epoch 6 Summary -> Avg. Loss: 0.8675, Accuracy: 75.07%
Epoch 7 Summary -> Avg. Loss: 0.8509, Accuracy: 75.08%
Epoch 8 Summary -> Avg. Loss: 0.8656, Accuracy: 74.86%
Epoch 9 Summary -> Avg. Loss: 0.8487, Accuracy: 75.09%
Epoch 10 Summary -> Avg. Loss: 0.8367, Accuracy: 75.40%
Epoch 11 Summary -> Avg. Loss: 0.8318, Accuracy: 75.97%
Epoch 12 Summary -> Avg. Loss: 0.8230, Accuracy: 75.68%
Epoch 13 Summary -> Avg. Loss: 0.8183, Accuracy: 75.75%
Epoch 14 Summary -> Avg. Loss: 0.8123, Accuracy: 75.80%
Epoch 15 Summary -> Avg. Loss: 0.8076, Accuracy: 75.71%
Epoch 16 Summary -> Avg. Loss: 0.8035, Accuracy: 76.19%
Epoch 17 Summary -> Avg. Loss: 0.7920, Accuracy: 76.25%
Epoch 18 Summary -> Avg. Loss: 0.7946, Accuracy: 76.15%
E

Evaluating (RNN_Bi_dot):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9789    0.8142    0.8890      1878
     C-Class     0.2813    0.5238    0.3661       210
     M-Class     0.1720    0.6923    0.2755        39
     X-Class     0.1905    1.0000    0.3200         4

    accuracy                         0.7837      2131
   macro avg     0.4057    0.7576    0.4626      2131
weighted avg     0.8939    0.7837    0.8251      2131


Normalized Confusion Matrix:
 [[0.814164   0.14536741 0.03620873 0.00425985]
 [0.14761905 0.52380952 0.2952381  0.03333333]
 [0.05128205 0.20512821 0.69230769 0.05128205]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.7576
  roc_auc_weighted    : 0.9120
  mcc                 : 0.3934
  f1_macro            : 0.4626
  f1_weighted         : 0.8251
RNN_Bi_dot.zip saved at /kaggle/working/RNN_Bi_dot.zip | Size: 0.48 MB

Using 2 GPUs via DataParallel!
Training model: RNN_Uni_concat for 60 ep

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8559, Accuracy: 75.20%
Epoch 2 Summary -> Avg. Loss: 0.8465, Accuracy: 75.15%
Epoch 3 Summary -> Avg. Loss: 0.8405, Accuracy: 75.38%
Epoch 4 Summary -> Avg. Loss: 0.8258, Accuracy: 75.09%
Epoch 5 Summary -> Avg. Loss: 0.8030, Accuracy: 75.59%
Epoch 6 Summary -> Avg. Loss: 0.7921, Accuracy: 75.82%
Epoch 7 Summary -> Avg. Loss: 0.7868, Accuracy: 76.08%
Epoch 8 Summary -> Avg. Loss: 0.7669, Accuracy: 76.52%
Epoch 9 Summary -> Avg. Loss: 0.7628, Accuracy: 76.50%
Epoch 10 Summary -> Avg. Loss: 0.7570, Accuracy: 76.60%
Epoch 11 Summary -> Avg. Loss: 0.7539, Accuracy: 76.81%
Epoch 12 Summary -> Avg. Loss: 0.7445, Accuracy: 76.92%
Epoch 13 Summary -> Avg. Loss: 0.7353, Accuracy: 76.75%
Epoch 14 Summary -> Avg. Loss: 0.7250, Accuracy: 77.03%
Epoch 15 Summary -> Avg. Loss: 0.7164, Accuracy: 77.02%
Epoch 16 Summary -> Avg. Loss: 0.7191, Accuracy: 76.74%
Epoch 17 Summary -> Avg. Loss: 0.7079, Accuracy: 77.05%
Epoch 18 Summary -> Avg. Loss: 0.7126, Accuracy: 77.12%
E

Evaluating (RNN_Uni_concat):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9818    0.7764    0.8671      1878
     C-Class     0.2625    0.6524    0.3743       210
     M-Class     0.2376    0.6154    0.3429        39
     X-Class     0.1304    0.7500    0.2222         4

    accuracy                         0.7611      2131
   macro avg     0.4031    0.6985    0.4516      2131
weighted avg     0.8957    0.7611    0.8077      2131


Normalized Confusion Matrix:
 [[0.77635783 0.19808307 0.02183174 0.00372737]
 [0.12380952 0.65238095 0.17142857 0.05238095]
 [0.02564103 0.30769231 0.61538462 0.05128205]
 [0.         0.25       0.         0.75      ]]

Key Metrics Summary:
  balanced_accuracy   : 0.6985
  roc_auc_weighted    : 0.9079
  mcc                 : 0.3929
  f1_macro            : 0.4516
  f1_weighted         : 0.8077
RNN_Uni_concat.zip saved at /kaggle/working/RNN_Uni_concat.zip | Size: 0.18 MB

Using 2 GPUs via DataParallel!
Training model: RNN_Bi_concat fo

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8564, Accuracy: 75.69%
Epoch 2 Summary -> Avg. Loss: 0.8567, Accuracy: 75.33%
Epoch 3 Summary -> Avg. Loss: 0.8542, Accuracy: 75.41%
Epoch 4 Summary -> Avg. Loss: 0.8429, Accuracy: 75.45%
Epoch 5 Summary -> Avg. Loss: 0.8516, Accuracy: 75.25%
Epoch 6 Summary -> Avg. Loss: 0.8697, Accuracy: 75.25%
Epoch 7 Summary -> Avg. Loss: 0.8505, Accuracy: 75.92%
Epoch 8 Summary -> Avg. Loss: 0.8397, Accuracy: 75.65%
Epoch 9 Summary -> Avg. Loss: 0.8229, Accuracy: 76.07%
Epoch 10 Summary -> Avg. Loss: 0.8372, Accuracy: 75.39%
Epoch 11 Summary -> Avg. Loss: 0.8208, Accuracy: 75.87%
Epoch 12 Summary -> Avg. Loss: 0.8306, Accuracy: 75.84%
Epoch 13 Summary -> Avg. Loss: 0.8228, Accuracy: 76.27%
Epoch 14 Summary -> Avg. Loss: 0.8200, Accuracy: 75.95%
Epoch 15 Summary -> Avg. Loss: 0.8210, Accuracy: 75.99%
Epoch 16 Summary -> Avg. Loss: 0.8102, Accuracy: 76.02%
Epoch 17 Summary -> Avg. Loss: 0.8018, Accuracy: 76.36%
Epoch 18 Summary -> Avg. Loss: 0.8126, Accuracy: 76.24%
E

Evaluating (RNN_Bi_concat):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9905    0.7194    0.8334      1878
     C-Class     0.2371    0.7238    0.3572       210
     M-Class     0.1966    0.5897    0.2949        39
     X-Class     0.2222    0.5000    0.3077         4

    accuracy                         0.7170      2131
   macro avg     0.4116    0.6332    0.4483      2131
weighted avg     0.9003    0.7170    0.7757      2131


Normalized Confusion Matrix:
 [[0.71938232 0.25239617 0.02822151 0.        ]
 [0.05714286 0.72380952 0.19047619 0.02857143]
 [0.02564103 0.35897436 0.58974359 0.02564103]
 [0.         0.25       0.25       0.5       ]]

Key Metrics Summary:
  balanced_accuracy   : 0.6332
  roc_auc_weighted    : 0.9037
  mcc                 : 0.3757
  f1_macro            : 0.4483
  f1_weighted         : 0.7757
RNN_Bi_concat.zip saved at /kaggle/working/RNN_Bi_concat.zip | Size: 0.48 MB

Using 2 GPUs via DataParallel!
Training model: LSTM_Uni_NoAtt for

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8527, Accuracy: 75.37%
Epoch 2 Summary -> Avg. Loss: 0.7969, Accuracy: 76.02%
Epoch 3 Summary -> Avg. Loss: 0.7723, Accuracy: 76.51%
Epoch 4 Summary -> Avg. Loss: 0.7622, Accuracy: 76.85%
Epoch 5 Summary -> Avg. Loss: 0.7446, Accuracy: 76.33%
Epoch 6 Summary -> Avg. Loss: 0.7541, Accuracy: 76.51%
Epoch 7 Summary -> Avg. Loss: 0.7303, Accuracy: 77.18%
Epoch 8 Summary -> Avg. Loss: 0.7141, Accuracy: 77.39%
Epoch 9 Summary -> Avg. Loss: 0.7006, Accuracy: 77.32%
Epoch 10 Summary -> Avg. Loss: 0.7044, Accuracy: 77.32%
Epoch 11 Summary -> Avg. Loss: 0.7051, Accuracy: 77.52%
Epoch 12 Summary -> Avg. Loss: 0.6952, Accuracy: 77.41%
Epoch 13 Summary -> Avg. Loss: 0.6916, Accuracy: 77.22%
Epoch 14 Summary -> Avg. Loss: 0.7108, Accuracy: 77.62%
Epoch 15 Summary -> Avg. Loss: 0.6792, Accuracy: 77.68%
Epoch 16 Summary -> Avg. Loss: 0.6716, Accuracy: 77.70%
Epoch 17 Summary -> Avg. Loss: 0.6599, Accuracy: 78.03%
Epoch 18 Summary -> Avg. Loss: 0.6496, Accuracy: 78.18%
E

Evaluating (LSTM_Uni_NoAtt):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9855    0.7945    0.8797      1878
     C-Class     0.3163    0.7667    0.4478       210
     M-Class     0.3100    0.7949    0.4460        39
     X-Class     0.3750    0.7500    0.5000         4

    accuracy                         0.7916      2131
   macro avg     0.4967    0.7765    0.5684      2131
weighted avg     0.9060    0.7916    0.8285      2131


Normalized Confusion Matrix:
 [[7.94462194e-01 1.81043663e-01 2.39616613e-02 5.32481363e-04]
 [1.04761905e-01 7.66666667e-01 1.14285714e-01 1.42857143e-02]
 [0.00000000e+00 1.79487179e-01 7.94871795e-01 2.56410256e-02]
 [0.00000000e+00 2.50000000e-01 0.00000000e+00 7.50000000e-01]]

Key Metrics Summary:
  balanced_accuracy   : 0.7765
  roc_auc_weighted    : 0.9233
  mcc                 : 0.4628
  f1_macro            : 0.5684
  f1_weighted         : 0.8285
LSTM_Uni_NoAtt.zip saved at /kaggle/working/LSTM_Uni_NoAtt.zip | Size: 0.73 MB


Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8351, Accuracy: 75.30%
Epoch 2 Summary -> Avg. Loss: 0.7886, Accuracy: 76.27%
Epoch 3 Summary -> Avg. Loss: 0.7628, Accuracy: 76.27%
Epoch 4 Summary -> Avg. Loss: 0.7455, Accuracy: 76.53%
Epoch 5 Summary -> Avg. Loss: 0.7407, Accuracy: 77.02%
Epoch 6 Summary -> Avg. Loss: 0.7168, Accuracy: 77.26%
Epoch 7 Summary -> Avg. Loss: 0.7039, Accuracy: 77.22%
Epoch 8 Summary -> Avg. Loss: 0.6945, Accuracy: 77.06%
Epoch 9 Summary -> Avg. Loss: 0.6817, Accuracy: 77.41%
Epoch 10 Summary -> Avg. Loss: 0.6675, Accuracy: 77.64%
Epoch 11 Summary -> Avg. Loss: 0.6598, Accuracy: 77.67%
Epoch 12 Summary -> Avg. Loss: 0.6445, Accuracy: 77.67%
Epoch 13 Summary -> Avg. Loss: 0.6513, Accuracy: 78.18%
Epoch 14 Summary -> Avg. Loss: 0.6399, Accuracy: 78.05%
Epoch 15 Summary -> Avg. Loss: 0.6171, Accuracy: 78.52%
Epoch 16 Summary -> Avg. Loss: 0.6225, Accuracy: 78.48%
Epoch 17 Summary -> Avg. Loss: 0.5966, Accuracy: 78.77%
Epoch 18 Summary -> Avg. Loss: 0.5927, Accuracy: 78.88%
E

Evaluating (LSTM_Bi_NoAtt):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9862    0.8003    0.8836      1878
     C-Class     0.3042    0.6619    0.4168       210
     M-Class     0.2080    0.6667    0.3171        39
     X-Class     0.1600    1.0000    0.2759         4

    accuracy                         0.7846      2131
   macro avg     0.4146    0.7822    0.4733      2131
weighted avg     0.9032    0.7846    0.8261      2131


Normalized Confusion Matrix:
 [[0.80031949 0.1656017  0.03088392 0.00319489]
 [0.1        0.66190476 0.1952381  0.04285714]
 [0.         0.17948718 0.66666667 0.15384615]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.7822
  roc_auc_weighted    : 0.9213
  mcc                 : 0.4318
  f1_macro            : 0.4733
  f1_weighted         : 0.8261
LSTM_Bi_NoAtt.zip saved at /kaggle/working/LSTM_Bi_NoAtt.zip | Size: 1.92 MB

Using 2 GPUs via DataParallel!
Training model: LSTM_Uni_dot for 6

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8494, Accuracy: 75.04%
Epoch 2 Summary -> Avg. Loss: 0.7888, Accuracy: 76.07%
Epoch 3 Summary -> Avg. Loss: 0.7619, Accuracy: 76.44%
Epoch 4 Summary -> Avg. Loss: 0.7394, Accuracy: 77.03%
Epoch 5 Summary -> Avg. Loss: 0.7250, Accuracy: 77.29%
Epoch 6 Summary -> Avg. Loss: 0.7040, Accuracy: 77.43%
Epoch 7 Summary -> Avg. Loss: 0.6959, Accuracy: 77.88%
Epoch 8 Summary -> Avg. Loss: 0.6782, Accuracy: 77.81%
Epoch 9 Summary -> Avg. Loss: 0.6741, Accuracy: 77.93%
Epoch 10 Summary -> Avg. Loss: 0.6549, Accuracy: 78.12%
Epoch 11 Summary -> Avg. Loss: 0.6894, Accuracy: 77.35%
Epoch 12 Summary -> Avg. Loss: 0.6765, Accuracy: 77.10%
Epoch 13 Summary -> Avg. Loss: 0.6486, Accuracy: 77.52%
Epoch 14 Summary -> Avg. Loss: 0.6499, Accuracy: 78.04%
Epoch 15 Summary -> Avg. Loss: 0.6460, Accuracy: 77.99%
Epoch 16 Summary -> Avg. Loss: 0.6888, Accuracy: 77.77%
Epoch 17 Summary -> Avg. Loss: 0.6864, Accuracy: 77.85%
Epoch 18 Summary -> Avg. Loss: 0.6457, Accuracy: 78.47%
E

Evaluating (LSTM_Uni_dot):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9912    0.8424    0.9108      1878
     C-Class     0.3995    0.8429    0.5421       210
     M-Class     0.3953    0.8718    0.5440        39
     X-Class     0.6667    1.0000    0.8000         4

    accuracy                         0.8433      2131
   macro avg     0.6132    0.8893    0.6992      2131
weighted avg     0.9214    0.8433    0.8675      2131


Normalized Confusion Matrix:
 [[0.84238552 0.1400426  0.01757188 0.        ]
 [0.06666667 0.84285714 0.09047619 0.        ]
 [0.         0.07692308 0.87179487 0.05128205]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.8893
  roc_auc_weighted    : 0.9476
  mcc                 : 0.5587
  f1_macro            : 0.6992
  f1_weighted         : 0.8675
LSTM_Uni_dot.zip saved at /kaggle/working/LSTM_Uni_dot.zip | Size: 0.73 MB

Using 2 GPUs via DataParallel!
Training model: LSTM_Bi_dot for 60 e

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8387, Accuracy: 75.31%
Epoch 2 Summary -> Avg. Loss: 0.7743, Accuracy: 76.25%
Epoch 3 Summary -> Avg. Loss: 0.7366, Accuracy: 77.02%
Epoch 4 Summary -> Avg. Loss: 0.7199, Accuracy: 77.35%
Epoch 5 Summary -> Avg. Loss: 0.7068, Accuracy: 77.03%
Epoch 6 Summary -> Avg. Loss: 0.6903, Accuracy: 77.44%
Epoch 7 Summary -> Avg. Loss: 0.6779, Accuracy: 77.86%
Epoch 8 Summary -> Avg. Loss: 0.6629, Accuracy: 78.23%
Epoch 9 Summary -> Avg. Loss: 0.6384, Accuracy: 78.14%
Epoch 10 Summary -> Avg. Loss: 0.6315, Accuracy: 78.83%
Epoch 11 Summary -> Avg. Loss: 0.6234, Accuracy: 78.45%
Epoch 12 Summary -> Avg. Loss: 0.6042, Accuracy: 78.59%
Epoch 13 Summary -> Avg. Loss: 0.5859, Accuracy: 79.19%
Epoch 14 Summary -> Avg. Loss: 0.5844, Accuracy: 78.98%
Epoch 15 Summary -> Avg. Loss: 0.5787, Accuracy: 79.40%
Epoch 16 Summary -> Avg. Loss: 0.5837, Accuracy: 79.09%
Epoch 17 Summary -> Avg. Loss: 0.5718, Accuracy: 79.38%
Epoch 18 Summary -> Avg. Loss: 0.5653, Accuracy: 79.77%
E

Evaluating (LSTM_Bi_dot):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9903    0.9201    0.9539      1878
     C-Class     0.5839    0.8952    0.7068       210
     M-Class     0.6316    0.9231    0.7500        39
     X-Class     0.5714    1.0000    0.7273         4

    accuracy                         0.9179      2131
   macro avg     0.6943    0.9346    0.7845      2131
weighted avg     0.9429    0.9179    0.9254      2131


Normalized Confusion Matrix:
 [[0.9201278  0.07082002 0.00905218 0.        ]
 [0.08095238 0.8952381  0.01904762 0.0047619 ]
 [0.         0.02564103 0.92307692 0.05128205]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.9346
  roc_auc_weighted    : 0.9747
  mcc                 : 0.7080
  f1_macro            : 0.7845
  f1_weighted         : 0.9254
LSTM_Bi_dot.zip saved at /kaggle/working/LSTM_Bi_dot.zip | Size: 1.92 MB

Using 2 GPUs via DataParallel!
Training model: LSTM_Uni_concat for 60

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8491, Accuracy: 74.96%
Epoch 2 Summary -> Avg. Loss: 0.7864, Accuracy: 75.90%
Epoch 3 Summary -> Avg. Loss: 0.7601, Accuracy: 76.28%
Epoch 4 Summary -> Avg. Loss: 0.7298, Accuracy: 76.97%
Epoch 5 Summary -> Avg. Loss: 0.7034, Accuracy: 77.36%
Epoch 6 Summary -> Avg. Loss: 0.6916, Accuracy: 77.69%
Epoch 7 Summary -> Avg. Loss: 0.6700, Accuracy: 77.82%
Epoch 8 Summary -> Avg. Loss: 0.6619, Accuracy: 77.94%
Epoch 9 Summary -> Avg. Loss: 0.6485, Accuracy: 78.02%
Epoch 10 Summary -> Avg. Loss: 0.6354, Accuracy: 78.12%
Epoch 11 Summary -> Avg. Loss: 0.6153, Accuracy: 78.26%
Epoch 12 Summary -> Avg. Loss: 0.6064, Accuracy: 78.73%
Epoch 13 Summary -> Avg. Loss: 0.6028, Accuracy: 78.81%
Epoch 14 Summary -> Avg. Loss: 0.5859, Accuracy: 79.21%
Epoch 15 Summary -> Avg. Loss: 0.5828, Accuracy: 78.96%
Epoch 16 Summary -> Avg. Loss: 0.5874, Accuracy: 78.64%
Epoch 17 Summary -> Avg. Loss: 0.6048, Accuracy: 78.31%
Epoch 18 Summary -> Avg. Loss: 0.6672, Accuracy: 77.23%
E

Evaluating (LSTM_Uni_concat):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9870    0.8056    0.8871      1878
     C-Class     0.3199    0.7571    0.4498       210
     M-Class     0.2935    0.6923    0.4122        39
     X-Class     0.3333    0.7500    0.4615         4

    accuracy                         0.7987      2131
   macro avg     0.4834    0.7513    0.5527      2131
weighted avg     0.9073    0.7987    0.8345      2131


Normalized Confusion Matrix:
 [[8.05644302e-01 1.74653887e-01 1.91693291e-02 5.32481363e-04]
 [9.52380952e-02 7.57142857e-01 1.33333333e-01 1.42857143e-02]
 [0.00000000e+00 2.56410256e-01 6.92307692e-01 5.12820513e-02]
 [0.00000000e+00 0.00000000e+00 2.50000000e-01 7.50000000e-01]]

Key Metrics Summary:
  balanced_accuracy   : 0.7513
  roc_auc_weighted    : 0.9284
  mcc                 : 0.4674
  f1_macro            : 0.5527
  f1_weighted         : 0.8345
LSTM_Uni_concat.zip saved at /kaggle/working/LSTM_Uni_concat.zip | Size: 0.73 M

Training Progress:   0%|          | 0/60 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8318, Accuracy: 75.57%
Epoch 2 Summary -> Avg. Loss: 0.7688, Accuracy: 76.58%
Epoch 3 Summary -> Avg. Loss: 0.7331, Accuracy: 76.80%
Epoch 4 Summary -> Avg. Loss: 0.7099, Accuracy: 77.12%
Epoch 5 Summary -> Avg. Loss: 0.6942, Accuracy: 77.49%
Epoch 6 Summary -> Avg. Loss: 0.6689, Accuracy: 77.89%
Epoch 7 Summary -> Avg. Loss: 0.6677, Accuracy: 77.90%
Epoch 8 Summary -> Avg. Loss: 0.6411, Accuracy: 78.64%
Epoch 9 Summary -> Avg. Loss: 0.6162, Accuracy: 78.99%
Epoch 10 Summary -> Avg. Loss: 0.6033, Accuracy: 79.03%
Epoch 11 Summary -> Avg. Loss: 0.5778, Accuracy: 79.39%
Epoch 12 Summary -> Avg. Loss: 0.5657, Accuracy: 79.99%
Epoch 13 Summary -> Avg. Loss: 0.5557, Accuracy: 80.21%
Epoch 14 Summary -> Avg. Loss: 0.5325, Accuracy: 80.40%
Epoch 15 Summary -> Avg. Loss: 0.5165, Accuracy: 80.90%
Epoch 16 Summary -> Avg. Loss: 0.4881, Accuracy: 81.36%
Epoch 17 Summary -> Avg. Loss: 0.5015, Accuracy: 81.26%
Epoch 18 Summary -> Avg. Loss: 0.4611, Accuracy: 82.14%
E

Evaluating (LSTM_Bi_concat):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9929    0.8962    0.9421      1878
     C-Class     0.5179    0.8952    0.6562       210
     M-Class     0.5217    0.9231    0.6667        39
     X-Class     0.7500    0.7500    0.7500         4

    accuracy                         0.8963      2131
   macro avg     0.6956    0.8661    0.7537      2131
weighted avg     0.9370    0.8963    0.9085      2131


Normalized Confusion Matrix:
 [[0.89616613 0.09158679 0.01224707 0.        ]
 [0.05714286 0.8952381  0.04285714 0.0047619 ]
 [0.         0.07692308 0.92307692 0.        ]
 [0.         0.         0.25       0.75      ]]

Key Metrics Summary:
  balanced_accuracy   : 0.8661
  roc_auc_weighted    : 0.9693
  mcc                 : 0.6634
  f1_macro            : 0.7537
  f1_weighted         : 0.9085
LSTM_Bi_concat.zip saved at /kaggle/working/LSTM_Bi_concat.zip | Size: 1.92 MB

Total experiment time: 442.63 minutes


Unnamed: 0,run_name,model_type,num_layers,hidden_size,attention,bidirectional,balanced_accuracy,roc_auc_weighted,mcc,f1_macro,f1_weighted,training_time_min
0,LSTM_Bi_dot,LSTM,2,128,dot,True,0.9346,0.9747,0.708,0.7845,0.9254,61.5377
1,LSTM_Bi_concat,LSTM,2,128,concat,True,0.8661,0.9693,0.6634,0.7537,0.9085,61.6548
2,LSTM_Uni_dot,LSTM,2,128,dot,False,0.8893,0.9476,0.5587,0.6992,0.8675,33.1952
3,LSTM_Uni_concat,LSTM,2,128,concat,False,0.7513,0.9284,0.4674,0.5527,0.8345,33.358
4,LSTM_Uni_NoAtt,LSTM,2,128,,False,0.7765,0.9233,0.4628,0.5684,0.8285,31.1556
5,LSTM_Bi_NoAtt,LSTM,2,128,,True,0.7822,0.9213,0.4318,0.4733,0.8261,59.8036
6,RNN_Uni_NoAtt,RNN,2,128,,False,0.4763,0.9026,0.4025,0.3956,0.8232,23.5624
7,RNN_Bi_dot,RNN,2,128,dot,True,0.7576,0.912,0.3934,0.4626,0.8251,30.2376
8,RNN_Uni_concat,RNN,2,128,concat,False,0.6985,0.9079,0.3929,0.4516,0.8077,25.8786
9,RNN_Bi_concat,RNN,2,128,concat,True,0.6332,0.9037,0.3757,0.4483,0.7757,28.7604


# Train Funrther

In [30]:
def unzip_and_load_model(model, model_name, save_dir="/kaggle/working"):
    zip_path = os.path.join(save_dir, f"{model_name}.zip")
    model_path = os.path.join(save_dir, f"{model_name}.pth")

    # Unzip the model
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(save_dir)

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found after extraction: {model_path}")

    # Load model weights
    model.load_state_dict(torch.load(model_path, map_location='cpu'))

    os.remove(model_path)

    return model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

results_log = []
total_start_time = time.time()

for i, config in enumerate(experiments):
    print(f"\n{'='*30} Experiment {i+1+20}/{len(experiments)}: {config['run_name']} {'='*30}")
    
    experiment_start_time = time.time()
    
    if config["model_type"] == "RNN":
        model = AttentiveRNN(
            input_size=len(features),
            hidden_size=config["hidden_size"],
            num_layers=config["num_layers"],
            num_classes=len(class_names),
            attention=config["attention"],
            bidirectional=config["bidirectional"]
        )
    else: # LSTM
        model = AttentiveLSTM(
            input_size=len(features),
            hidden_size=config["hidden_size"],
            num_classes=len(class_names),
            num_layers=config["num_layers"],
            attention=config["attention"],
            bidirectional=config["bidirectional"]
        )
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs via DataParallel!")
        model = nn.DataParallel(model)
        
    model = unzip_and_load_model(model, config['run_name'])
    model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.CrossEntropyLoss()
    
    print(f"Training model: {config['run_name']} for {config['epochs']} epochs...")
    training(model, device, criterion, optimizer, train, train_epochs=config["epochs"])
    
    print(f"Evaluating model: {config['run_name']}...")
    eval_metrics = eval_detailed(model, device, dev, report_title=config['run_name'])
    
    experiment_time = time.time() - experiment_start_time
    save_model_as_zip(model, model_name=config['run_name'])
    
    run_result = {
        "run_name": config["run_name"],
        "model_type": config["model_type"],
        "num_layers": config["num_layers"],
        "hidden_size": config["hidden_size"],
        "attention": str(config["attention"]),
        "bidirectional": config["bidirectional"],
        **eval_metrics,
        "training_time_min": experiment_time / 60
    }
    results_log.append(run_result)
    
    pd.DataFrame(results_log).to_csv("experiment_report_intermediate.csv", index=False)


print(f"\n{'='*30} FINAL EXPERIMENT REPORT {'='*30}")
total_run_time = time.time() - total_start_time
print(f"Total experiment time: {total_run_time/60:.2f} minutes")

results_df = pd.DataFrame(results_log)
results_df = results_df.sort_values(by="mcc", ascending=False).reset_index(drop=True)

pd.set_option('display.float_format', '{:.4f}'.format)
display(results_df)

results_df.to_csv("experiment_report_final2.csv", index=False)

In [32]:
!zip -r Unihead.zip /kaggle/working/

  adding: kaggle/working/ (stored 0%)
  adding: kaggle/working/RNN_Uni_dot.zip (stored 0%)
  adding: kaggle/working/LSTM_Bi_NoAtt.zip (stored 0%)
  adding: kaggle/working/.virtual_documents/ (stored 0%)
  adding: kaggle/working/RNN_Bi_concat.zip (stored 0%)
  adding: kaggle/working/experiment_report_final.csv (deflated 50%)
  adding: kaggle/working/LSTM_Uni_dot.zip (stored 0%)
  adding: kaggle/working/LSTM_Bi_concat.zip (stored 0%)
  adding: kaggle/working/LSTM_Bi_dot.zip (stored 0%)
  adding: kaggle/working/RNN_Bi_NoAtt.zip (stored 0%)
  adding: kaggle/working/RNN_Bi_dot.zip (stored 0%)
  adding: kaggle/working/RNN_Uni_concat.zip (stored 0%)
  adding: kaggle/working/experiment_report_intermediate.csv (deflated 42%)
  adding: kaggle/working/RNN_Uni_NoAtt.zip (stored 0%)
  adding: kaggle/working/scaler_params.csv (deflated 57%)
  adding: kaggle/working/LSTM_Uni_NoAtt.zip (stored 0%)
  adding: kaggle/working/LSTM_Uni_concat.zip (stored 0%)


# Multi Head Attention Bi-LSTM

In [26]:
class MultiHeadBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, num_heads=4):
        super().__init__()

        self.hidden_size = hidden_size
        self.num_heads = num_heads

        # BiLSTM
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )

        # Multi-Head Attention
        self.mha = nn.MultiheadAttention(
            embed_dim=hidden_size * 2,
            num_heads=num_heads,
            batch_first=True
        )

        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, X):
        # X: (batch_size, seq_len, input_size)
        lstm_out, (h_n, c_n) = self.lstm(X)
        # lstm_out: (batch_size, seq_len, hidden_size * num_directions)

        attn_out, attn_weights = self.mha(lstm_out, lstm_out, lstm_out)
        # attn_out: (batch_size, seq_len, hidden_size * num_directions)

        context = torch.mean(attn_out, dim=1)
        # context: (batch_size, hidden_size * num_directions)

        output = self.fc(context)

        return output, attn_weights


In [31]:
experiments = []

# Experiment 1
run_name = "MH_Bi_LSTM_hs256_l2_h4"
config = {
    "run_name": run_name,
    "hidden_size": 256,
    "num_layers": 2,
    "heads": 4,
    "lr": 0.001,
    "epochs": 40
}
experiments.append(config)

# Experiment 2
run_name = "MH_Bi_LSTM_hs128_l2_h8"
config = {
    "run_name": run_name,
    "hidden_size": 128,
    "num_layers": 2,
    "heads": 8,
    "lr": 0.001,
    "epochs": 40
}
experiments.append(config)

# Experiment 3
run_name = "MH_Bi_LSTM_hs256_l3_h4"
config = {
    "run_name": run_name,
    "hidden_size": 256,
    "num_layers": 3,
    "heads": 4,
    "lr": 0.001,
    "epochs": 40
}
experiments.append(config)

# Experiment 4
run_name = "MH_Bi_LSTM_hs128_l1_h8"
config = {
    "run_name": run_name,
    "hidden_size": 128,
    "num_layers": 1,
    "heads": 8,
    "lr": 0.001,
    "epochs": 40
}
experiments.append(config)

In [32]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

results_log = []
total_start_time = time.time()

for i, config in enumerate(experiments):
    print(f"\n{'='*30} Experiment {i+1}/{len(experiments)}: {config['run_name']} {'='*30}")
    
    experiment_start_time = time.time()
    
    
    model = MultiHeadBiLSTM(
        input_size=len(features),
        hidden_size=config["hidden_size"],
        num_classes=len(class_names),
        num_layers=config["num_layers"],
        num_heads=config["heads"]
    )
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs via DataParallel!")
        model = nn.DataParallel(model)
        
    # model = unzip_and_load_model(model, config['run_name'])
    model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    
    print(f"Training model: {config['run_name']} for {config['epochs']} epochs...")
    training(model, device, criterion, optimizer, train, train_epochs=config["epochs"])
    
    print(f"Evaluating model: {config['run_name']}...")
    eval_metrics = eval_detailed(model, device, dev, report_title=config['run_name'])
    
    experiment_time = time.time() - experiment_start_time
    save_model_as_zip(model, model_name=config['run_name'])
    
    run_result = {
        "run_name": config["run_name"],
        "num_layers": config["num_layers"],
        "hidden_size": config["hidden_size"],
        "num_heads": config["heads"],
        **eval_metrics,
        "training_time_min": experiment_time / 60
    }
    results_log.append(run_result)
    
    pd.DataFrame(results_log).to_csv("multihead_experiment_report_intermediate.csv", index=False)


print(f"\n{'='*30} FINAL EXPERIMENT REPORT {'='*30}")
total_run_time = time.time() - total_start_time
print(f"Total experiment time: {total_run_time/60:.2f} minutes")

results_df = pd.DataFrame(results_log)
results_df = results_df.sort_values(by="mcc", ascending=False).reset_index(drop=True)

pd.set_option('display.float_format', '{:.4f}'.format)
display(results_df)

results_df.to_csv("multihead_experiment_report.csv", index=False)


Using 2 GPUs via DataParallel!
Training model: MH_Bi_LSTM_hs256_l2_h4 for 40 epochs...
Starting training for 40 epochs on cuda.


Training Progress:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8657, Accuracy: 74.97%
Epoch 2 Summary -> Avg. Loss: 0.8290, Accuracy: 75.41%
Epoch 3 Summary -> Avg. Loss: 0.7998, Accuracy: 76.49%
Epoch 4 Summary -> Avg. Loss: 0.7778, Accuracy: 76.68%
Epoch 5 Summary -> Avg. Loss: 0.7743, Accuracy: 76.38%
Epoch 6 Summary -> Avg. Loss: 0.7523, Accuracy: 76.35%
Epoch 7 Summary -> Avg. Loss: 0.7386, Accuracy: 76.39%
Epoch 8 Summary -> Avg. Loss: 0.7222, Accuracy: 77.07%
Epoch 9 Summary -> Avg. Loss: 0.7040, Accuracy: 76.99%
Epoch 10 Summary -> Avg. Loss: 0.6897, Accuracy: 76.84%
Epoch 11 Summary -> Avg. Loss: 0.6700, Accuracy: 77.21%
Epoch 12 Summary -> Avg. Loss: 0.6474, Accuracy: 77.90%
Epoch 13 Summary -> Avg. Loss: 0.6274, Accuracy: 78.22%
Epoch 14 Summary -> Avg. Loss: 0.6209, Accuracy: 78.48%
Epoch 15 Summary -> Avg. Loss: 0.5995, Accuracy: 78.90%
Epoch 16 Summary -> Avg. Loss: 0.5893, Accuracy: 78.76%
Epoch 17 Summary -> Avg. Loss: 0.5701, Accuracy: 79.25%
Epoch 18 Summary -> Avg. Loss: 0.5571, Accuracy: 79.54%
E

Evaluating (MH_Bi_LSTM_hs256_l2_h4):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9839    0.8115    0.8894      1878
     C-Class     0.3521    0.7143    0.4717       210
     M-Class     0.2153    0.7949    0.3388        39
     X-Class     0.3333    1.0000    0.5000         4

    accuracy                         0.8020      2131
   macro avg     0.4711    0.8302    0.5500      2131
weighted avg     0.9063    0.8020    0.8374      2131


Normalized Confusion Matrix:
 [[0.8115016  0.14430245 0.04313099 0.00106496]
 [0.11904762 0.71428571 0.15238095 0.01428571]
 [0.         0.12820513 0.79487179 0.07692308]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.8302
  roc_auc_weighted    : 0.9251
  mcc                 : 0.4653
  f1_macro            : 0.5500
  f1_weighted         : 0.8374
MH_Bi_LSTM_hs256_l2_h4.zip saved at /kaggle/working/MH_Bi_LSTM_hs256_l2_h4.zip | Size: 11.28 MB

Using 2 GPUs via DataParallel!
Training model:

Training Progress:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8547, Accuracy: 75.16%
Epoch 2 Summary -> Avg. Loss: 0.8068, Accuracy: 75.79%
Epoch 3 Summary -> Avg. Loss: 0.7712, Accuracy: 76.66%
Epoch 4 Summary -> Avg. Loss: 0.7457, Accuracy: 76.78%
Epoch 5 Summary -> Avg. Loss: 0.7121, Accuracy: 77.39%
Epoch 6 Summary -> Avg. Loss: 0.6990, Accuracy: 77.75%
Epoch 7 Summary -> Avg. Loss: 0.6789, Accuracy: 78.07%
Epoch 8 Summary -> Avg. Loss: 0.6695, Accuracy: 78.28%
Epoch 9 Summary -> Avg. Loss: 0.6594, Accuracy: 78.58%
Epoch 10 Summary -> Avg. Loss: 0.6532, Accuracy: 78.57%
Epoch 11 Summary -> Avg. Loss: 0.6264, Accuracy: 79.31%
Epoch 12 Summary -> Avg. Loss: 0.6039, Accuracy: 79.61%
Epoch 13 Summary -> Avg. Loss: 0.5917, Accuracy: 79.87%
Epoch 14 Summary -> Avg. Loss: 0.5667, Accuracy: 80.22%
Epoch 15 Summary -> Avg. Loss: 0.5547, Accuracy: 80.76%
Epoch 16 Summary -> Avg. Loss: 0.5364, Accuracy: 81.18%
Epoch 17 Summary -> Avg. Loss: 0.5252, Accuracy: 81.62%
Epoch 18 Summary -> Avg. Loss: 0.5006, Accuracy: 82.19%
E

Evaluating (MH_Bi_LSTM_hs128_l2_h8):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9928    0.8834    0.9349      1878
     C-Class     0.4848    0.9095    0.6325       210
     M-Class     0.5667    0.8718    0.6869        39
     X-Class     0.5000    0.7500    0.6000         4

    accuracy                         0.8855      2131
   macro avg     0.6361    0.8537    0.7136      2131
weighted avg     0.9340    0.8855    0.8999      2131


Normalized Confusion Matrix:
 [[0.88338658 0.10596379 0.01064963 0.        ]
 [0.05714286 0.90952381 0.02380952 0.00952381]
 [0.         0.1025641  0.87179487 0.02564103]
 [0.         0.         0.25       0.75      ]]

Key Metrics Summary:
  balanced_accuracy   : 0.8537
  roc_auc_weighted    : 0.9635
  mcc                 : 0.6430
  f1_macro            : 0.7136
  f1_weighted         : 0.8999
MH_Bi_LSTM_hs128_l2_h8.zip saved at /kaggle/working/MH_Bi_LSTM_hs128_l2_h8.zip | Size: 2.85 MB

Using 2 GPUs via DataParallel!
Training model: 

Training Progress:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8735, Accuracy: 74.81%
Epoch 2 Summary -> Avg. Loss: 0.8253, Accuracy: 75.97%
Epoch 3 Summary -> Avg. Loss: 0.8000, Accuracy: 76.48%
Epoch 4 Summary -> Avg. Loss: 0.7805, Accuracy: 76.13%
Epoch 5 Summary -> Avg. Loss: 0.7546, Accuracy: 76.75%
Epoch 6 Summary -> Avg. Loss: 0.7405, Accuracy: 76.95%
Epoch 7 Summary -> Avg. Loss: 0.7243, Accuracy: 77.19%
Epoch 8 Summary -> Avg. Loss: 0.7074, Accuracy: 77.56%
Epoch 9 Summary -> Avg. Loss: 0.6996, Accuracy: 77.87%
Epoch 10 Summary -> Avg. Loss: 0.7084, Accuracy: 77.55%
Epoch 11 Summary -> Avg. Loss: 0.6620, Accuracy: 78.16%
Epoch 12 Summary -> Avg. Loss: 0.6494, Accuracy: 78.34%
Epoch 13 Summary -> Avg. Loss: 0.6411, Accuracy: 78.39%
Epoch 14 Summary -> Avg. Loss: 0.6168, Accuracy: 78.68%
Epoch 15 Summary -> Avg. Loss: 0.6082, Accuracy: 79.01%
Epoch 16 Summary -> Avg. Loss: 0.5902, Accuracy: 79.40%
Epoch 17 Summary -> Avg. Loss: 0.5608, Accuracy: 79.78%
Epoch 18 Summary -> Avg. Loss: 0.5550, Accuracy: 80.06%
E

Evaluating (MH_Bi_LSTM_hs256_l3_h4):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9864    0.8908    0.9362      1878
     C-Class     0.4793    0.8286    0.6073       210
     M-Class     0.4516    0.7179    0.5545        39
     X-Class     0.4000    1.0000    0.5714         4

    accuracy                         0.8817      2131
   macro avg     0.5793    0.8593    0.6674      2131
weighted avg     0.9256    0.8817    0.8961      2131


Normalized Confusion Matrix:
 [[0.89084132 0.09584665 0.01224707 0.00106496]
 [0.1047619  0.82857143 0.05238095 0.01428571]
 [0.02564103 0.23076923 0.71794872 0.02564103]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.8593
  roc_auc_weighted    : 0.9561
  mcc                 : 0.6084
  f1_macro            : 0.6674
  f1_weighted         : 0.8961
MH_Bi_LSTM_hs256_l3_h4.zip saved at /kaggle/working/MH_Bi_LSTM_hs256_l3_h4.zip | Size: 16.86 MB

Using 2 GPUs via DataParallel!
Training model:

Training Progress:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch 1 Summary -> Avg. Loss: 0.8437, Accuracy: 75.39%
Epoch 2 Summary -> Avg. Loss: 0.8058, Accuracy: 75.87%
Epoch 3 Summary -> Avg. Loss: 0.7956, Accuracy: 76.07%
Epoch 4 Summary -> Avg. Loss: 0.7585, Accuracy: 76.18%
Epoch 5 Summary -> Avg. Loss: 0.7309, Accuracy: 76.96%
Epoch 6 Summary -> Avg. Loss: 0.7294, Accuracy: 77.12%
Epoch 7 Summary -> Avg. Loss: 0.7146, Accuracy: 77.53%
Epoch 8 Summary -> Avg. Loss: 0.7026, Accuracy: 77.85%
Epoch 9 Summary -> Avg. Loss: 0.6824, Accuracy: 78.03%
Epoch 10 Summary -> Avg. Loss: 0.6674, Accuracy: 78.24%
Epoch 11 Summary -> Avg. Loss: 0.6504, Accuracy: 78.72%
Epoch 12 Summary -> Avg. Loss: 0.6666, Accuracy: 78.64%
Epoch 13 Summary -> Avg. Loss: 0.6438, Accuracy: 78.56%
Epoch 14 Summary -> Avg. Loss: 0.6285, Accuracy: 79.07%
Epoch 15 Summary -> Avg. Loss: 0.6261, Accuracy: 78.67%
Epoch 16 Summary -> Avg. Loss: 0.6062, Accuracy: 79.34%
Epoch 17 Summary -> Avg. Loss: 0.5959, Accuracy: 79.21%
Epoch 18 Summary -> Avg. Loss: 0.5815, Accuracy: 79.56%
E

Evaluating (MH_Bi_LSTM_hs128_l1_h8):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9744    0.8099    0.8846      1878
     C-Class     0.2900    0.6476    0.4006       210
     M-Class     0.1860    0.4103    0.2560        39
     X-Class     0.2000    0.7500    0.3158         4

    accuracy                         0.7865      2131
   macro avg     0.4126    0.6544    0.4642      2131
weighted avg     0.8911    0.7865    0.8243      2131


Normalized Confusion Matrix:
 [[0.80990415 0.16719915 0.02129925 0.00159744]
 [0.18571429 0.64761905 0.13809524 0.02857143]
 [0.02564103 0.48717949 0.41025641 0.07692308]
 [0.         0.         0.25       0.75      ]]

Key Metrics Summary:
  balanced_accuracy   : 0.6544
  roc_auc_weighted    : 0.9059
  mcc                 : 0.3991
  f1_macro            : 0.4642
  f1_weighted         : 0.8243
MH_Bi_LSTM_hs128_l1_h8.zip saved at /kaggle/working/MH_Bi_LSTM_hs128_l1_h8.zip | Size: 1.45 MB

Total experiment time: 530.83 minutes


Unnamed: 0,run_name,num_layers,hidden_size,num_heads,balanced_accuracy,roc_auc_weighted,mcc,f1_macro,f1_weighted,training_time_min
0,MH_Bi_LSTM_hs128_l2_h8,2,128,8,0.8537,0.9635,0.643,0.7136,0.8999,67.1872
1,MH_Bi_LSTM_hs256_l3_h4,3,256,4,0.8593,0.9561,0.6084,0.6674,0.8961,243.2759
2,MH_Bi_LSTM_hs256_l2_h4,2,256,4,0.8302,0.9251,0.4653,0.55,0.8374,173.4759
3,MH_Bi_LSTM_hs128_l1_h8,1,128,8,0.6544,0.9059,0.3991,0.4642,0.8243,46.8569


# Final Evaluation of Best Model

In [50]:
model = AttentiveLSTM(
            input_size=len(features),
            hidden_size=128,
            num_classes=len(class_names),
            num_layers=2,
            attention="dot",
            bidirectional=True
        )
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.nn.DataParallel(model) 
model_path = "/kaggle/input/lstm-bi-dot/pytorch/default/1/LSTM_Bi_dot.pth"
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.to(device)
eval_metrics = eval_detailed(model, device, test, report_title="LSTM_Bi_dot")

Evaluating (LSTM_Bi_dot):   0%|          | 0/34 [00:00<?, ?it/s]


Classification Report:

              precision    recall  f1-score   support

    No Flare     0.9902    0.9169    0.9522      1878
     C-Class     0.5770    0.9095    0.7061       210
     M-Class     0.6296    0.8718    0.7312        39
     X-Class     0.5714    1.0000    0.7273         4

    accuracy                         0.9155      2131
   macro avg     0.6921    0.9246    0.7792      2131
weighted avg     0.9421    0.9155    0.9235      2131


Normalized Confusion Matrix:
 [[0.91693291 0.07348243 0.00958466 0.        ]
 [0.07619048 0.90952381 0.00952381 0.0047619 ]
 [0.02564103 0.05128205 0.87179487 0.05128205]
 [0.         0.         0.         1.        ]]

Key Metrics Summary:
  balanced_accuracy   : 0.9246
  roc_auc_weighted    : 0.9730
  mcc                 : 0.7031
  f1_macro            : 0.7792
  f1_weighted         : 0.9235
