In [None]:
import numpy as np
import h5py
from sklearn.semi_supervised import LabelSpreading
from sklearn.metrics import classification_report, confusion_matrix

# --- load your data (as you already have it) ---
with h5py.File("datasets/features_S01_combined.h5", "r") as f:
    norm_grp = f["features/S01/1/Normal/c_data"]
    relay_grp = f["features/S01/1/Relay/c_data"]
    n_norm = norm_grp["0"].shape[0]
    n_relay = relay_grp["0"].shape[0]

    # allocate
    data_norm  = np.zeros((49997, n_norm))
    data_relay = np.zeros((49998, n_relay))
    for i in range(49997):
        try: data_norm[i]  = norm_grp[str(i)][:]
        except KeyError: pass
    for i in range(49998):
        try: data_relay[i] = relay_grp[str(i)][:]
        except KeyError: pass

X_train          = data_norm[:47500]
X_test_norm      = data_norm[47500:]
X_test_relay     = data_relay[47500:]
X_test_combined  = np.vstack((X_test_norm, X_test_relay))
y_true           = np.array([0]*len(X_test_norm) + [1]*len(X_test_relay))

# --- prepare semi-supervised training set ---
# combine all training candidates: normals + relays
X_ssl = np.vstack((X_train, X_test_relay))
y_ssl = np.hstack((np.zeros(len(X_train)), np.ones(len(X_test_relay))))

# initialize as “unlabeled”
y_labels = np.full_like(y_ssl, fill_value=-1, dtype=int)

# pick 100 normals and 100 relays as initially labeled
rng = np.random.RandomState(42)
norm_idx  = rng.choice(np.where(y_ssl==0)[0], 100, replace=False)
relay_idx = rng.choice(np.where(y_ssl==1)[0], 100, replace=False)
y_labels[norm_idx]  = 0
y_labels[relay_idx] = 1

# --- fit LabelSpreading ---
model = LabelSpreading(kernel='rbf', alpha=0.2, max_iter=1000)
model.fit(X_ssl, y_labels)

# --- predict on your held-out test set ---
y_pred = model.predict(X_test_combined)

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))
print("\nClassification Report:")
print(classification_report(y_true, y_pred))
