In [11]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [39]:
# Load from local /content or from Drive
X = np.load('X_2012.npy')   # shape (501318, 180, 2)
y = np.load('y_2012.npy')   # shape (501318,)

# Replace non-finite values with 0
X[~np.isfinite(X)] = 0.0

# Clip negatives to 0
X = np.clip(X, 0.0, None)

X = np.log10(X + 1e-10)

mean = X.mean(axis=(0, 1), keepdims=True)
std  = X.std(axis=(0, 1), keepdims=True) + 1e-6

X = (X - mean) / std

print(X.shape, y.shape)
pd.Series(y).value_counts()

(501318, 180, 2) (501318,)


Unnamed: 0,count
A,348001
C,102287
B,39801
M,10619
X,610


In [40]:
# Encode string labels -> integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y_encoded, test_size=0.30, random_state=42, stratify=y_encoded
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = torch.tensor(class_weights, dtype=torch.float32)

In [27]:
from sklearn.ensemble import RandomForestClassifier

# Flatten
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat   = X_val.reshape(X_val.shape[0], -1)
X_test_flat  = X_test.reshape(X_test.shape[0], -1)

# Optional: subsample for speed
n_sub = 80000
idx = np.random.choice(len(X_train_flat), size=n_sub, replace=False)
X_train_sub = X_train_flat[idx]
y_train_sub = y_train[idx]

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    n_jobs=-1,
    random_state=42,
    class_weight='balanced_subsample'
)

rf.fit(X_train_sub, y_train_sub)

y_pred_val = rf.predict(X_val_flat)
print("Random Forest - Validation report:")
print(classification_report(y_val, y_pred_val, target_names=le.classes_))

Random Forest - Validation report:
              precision    recall  f1-score   support

           A       0.76      0.99      0.86     52201
           B       0.93      0.10      0.17      5970
           C       0.94      0.35      0.51     15343
           M       0.86      0.28      0.42      1593
           X       1.00      0.41      0.58        91

    accuracy                           0.78     75198
   macro avg       0.90      0.43      0.51     75198
weighted avg       0.81      0.78      0.73     75198



In [41]:
class GOESDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.int64)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # X: (seq_len=180, features=2) -> (channels=2, seq_len=180)
        x = self.X[idx].transpose(1, 0)
        return x, self.y[idx]

train_ds = GOESDataset(X_train, y_train)
val_ds   = GOESDataset(X_val, y_val)
test_ds  = GOESDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=512, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=512, shuffle=False)

In [42]:
class CNN1D(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.conv1 = nn.Conv1d(2, 32, kernel_size=5, padding=2)
        self.bn1   = nn.BatchNorm1d(32)

        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, padding=2)
        self.bn2   = nn.BatchNorm1d(64)

        self.conv3 = nn.Conv1d(64, 128, kernel_size=5, padding=2)
        self.bn3   = nn.BatchNorm1d(128)

        self.pool = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(0.3)

        # global pooling over time dimension
        self.global_pool = nn.AdaptiveAvgPool1d(1)

        self.fc = nn.Linear(128, n_classes)

    def forward(self, x):
        # x: (batch, 2, 180)
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))  # -> (32, 90)
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))  # -> (64, 45)
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))  # -> (128, 22 or 23)
        x = self.global_pool(x)   # -> (batch, 128, 1)
        x = x.squeeze(-1)         # -> (batch, 128)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [43]:
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        """
        alpha: tensor of shape [num_classes] with class weights
        gamma: focusing parameter
        """
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        # logits: (batch, num_classes)
        # targets: (batch,)
        ce_loss = F.cross_entropy(
            logits, targets,
            weight=self.alpha,
            reduction='none'  # we handle reduction ourselves
        )  # shape (batch,)

        # pt is the probability of the true class
        pt = torch.exp(-ce_loss)  # high for well-classified, low for misclassified

        focal_term = (1 - pt) ** self.gamma
        loss = focal_term * ce_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss

In [44]:
print("Using focal loss:", type(criterion))
print("Class weights:", class_weights)

Using focal loss: <class '__main__.FocalLoss'>
Class weights: tensor([  0.2881,   2.5191,   0.9802,   9.4423, 164.3663])


In [45]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

n_classes = len(le.classes_)
model = CNN1D(n_classes).to(device)

raw = class_weights.numpy()
scaled = np.sqrt(raw)
scaled = scaled / scaled.min()
class_weights_tamed = torch.tensor(scaled, dtype=torch.float32)
print("Tamed weights:", class_weights_tamed)

criterion = FocalLoss(alpha=class_weights_tamed.to(device), gamma=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()
    running_loss, correct, total = 0.0, 0, 0

    for Xb, yb in loader:
        Xb, yb = Xb.to(device), yb.to(device)

        if train:
            optimizer.zero_grad()

        with torch.set_grad_enabled(train):
            outputs = model(Xb)
            loss = criterion(outputs, yb)
            if train:
                loss.backward()
                optimizer.step()

        running_loss += loss.item() * yb.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    return running_loss / total, correct / total

EPOCHS = 20
for epoch in range(1, EPOCHS+1):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss, val_acc     = run_epoch(val_loader, train=False)
    print(f"Epoch {epoch:02d} | "
          f"Train loss {train_loss:.4f}, acc {train_acc:.3f} | "
          f"Val loss {val_loss:.4f}, acc {val_acc:.3f}")

Using device: cpu
Tamed weights: tensor([ 1.0000,  2.9569,  1.8445,  5.7248, 23.8850])
Epoch 01 | Train loss 1.1538, acc 0.597 | Val loss 1.1700, acc 0.498
Epoch 02 | Train loss 1.0821, acc 0.611 | Val loss 1.2890, acc 0.395
Epoch 03 | Train loss 1.0439, acc 0.616 | Val loss 1.2970, acc 0.647
Epoch 04 | Train loss 1.0116, acc 0.622 | Val loss 1.0390, acc 0.650
Epoch 05 | Train loss 0.9797, acc 0.627 | Val loss 1.0973, acc 0.678
Epoch 06 | Train loss 0.9485, acc 0.631 | Val loss 1.0052, acc 0.701
Epoch 07 | Train loss 0.9161, acc 0.638 | Val loss 1.4685, acc 0.591
Epoch 08 | Train loss 0.8817, acc 0.645 | Val loss 0.9939, acc 0.658
Epoch 09 | Train loss 0.8493, acc 0.652 | Val loss 0.9685, acc 0.665
Epoch 10 | Train loss 0.8253, acc 0.659 | Val loss 1.2489, acc 0.629
Epoch 11 | Train loss 0.8014, acc 0.665 | Val loss 1.0287, acc 0.640
Epoch 12 | Train loss 0.7800, acc 0.671 | Val loss 0.8101, acc 0.675
Epoch 13 | Train loss 0.7576, acc 0.677 | Val loss 0.9496, acc 0.706
Epoch 14 | Train

In [46]:
model.eval()
all_preds = []
all_true  = []

with torch.no_grad():
    for Xb, yb in test_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        outputs = model(Xb)
        preds = outputs.argmax(dim=1)
        all_preds.append(preds.cpu().numpy())
        all_true.append(yb.cpu().numpy())

all_preds = np.concatenate(all_preds)
all_true  = np.concatenate(all_true)

print("CNN Test classification report:")
print(classification_report(all_true, all_preds, target_names=le.classes_))

print("Confusion matrix:")
print(confusion_matrix(all_true, all_preds))

CNN Test classification report:
              precision    recall  f1-score   support

           A       0.87      0.70      0.77     52200
           B       0.46      0.67      0.55      5970
           C       0.48      0.72      0.57     15343
           M       0.67      0.59      0.62      1593
           X       0.58      0.99      0.73        92

    accuracy                           0.70     75198
   macro avg       0.61      0.73      0.65     75198
weighted avg       0.75      0.70      0.71     75198

Confusion matrix:
[[36428  4311 11164   242    55]
 [ 1506  4016   441     7     0]
 [ 3711   365 11046   213     8]
 [  164    10   484   933     2]
 [    1     0     0     0    91]]


1Ô∏è‚É£ What these new results really mean

RF (for comparison, still similar):

Accuracy: 0.78

C recall: 0.35

M recall: 0.28

X recall: 0.41

So RF = high accuracy, but meh at catching real flares.

CNN with focal loss + deeper net + log scaling:

              precision    recall   support
A              0.92        0.01     52200
B              0.16        0.96      5970
C              0.32        0.57     15343
M              0.13        0.96      1593
X              0.24        1.00        92

accuracy: 0.22
macro avg recall: 0.70


Interpretation:

It almost completely stopped predicting A (only 1% of A are recognised as A).

It is extremely good at recognising flares:

B recall: 0.96

C recall: 0.57

M recall: 0.96

X recall: 1.00

So it‚Äôs basically saying:

‚ÄúIf there is any chance of a flare, I scream FLARE, I don‚Äôt care about being wrong on quiet periods.‚Äù

In operational space-weather terms:
This is a hyper-sensitive ‚Äúnever miss a flare‚Äù mode.
Tons of false alarms, very few missed flares.

That‚Äôs actually a legit behaviour and something you can use in your thesis as one extreme of the trade-off.

2Ô∏è‚É£ Why this is happening (intuition)

Your class weights:

A:  0.29
B:  2.52
C:  0.98
M:  9.44
X: 164.37


And then we used focal loss with gamma=2:

Rare classes (M, X) are hugely upweighted.

Focal loss further boosts the contribution from misclassified hard examples.

So the optimisation problem becomes:

‚ÄúI would rather be wrong on thousands of A-class windows than miss even a few M/X flares.‚Äù

The model listened. Hard. üòÖ

So:

Mathematically: the loss from misclassifying flares is massive compared to misclassifying A.

So the network sacrifices almost all A accuracy to nail flares.

In other words, we overcorrected the imbalance.