In [None]:
%pip install -U pandas numpy scikit-learn matplotlib openpyxl scikeras

In [None]:
import sys, tensorflow as tf
print(sys.executable) 
print("TF:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices("GPU"))

In [None]:

import os, random, numpy as np, pandas as pd, matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers


random.seed(42); np.random.seed(42); tf.random.set_seed(42)


os.makedirs("outputs", exist_ok=True)

print("TensorFlow:", tf.__version__)

In [None]:
TARGET = "Grid  Power"  
FILTER_Y_EQ_ZERO = True   


raw_data = pd.read_excel('raw data.xlsx')


raw_data = raw_data.drop(columns=['times'], errors='ignore').copy()


if FILTER_Y_EQ_ZERO:
    raw_data = raw_data[raw_data[TARGET] != 0].copy()


num_cols = [c for c in raw_data.columns if c != TARGET]
raw_data[num_cols] = raw_data[num_cols].apply(pd.to_numeric, errors='coerce')
raw_data = raw_data.dropna().reset_index(drop=True)

print("Data shape:", raw_data.shape)
display(raw_data.head(3))


In [None]:


train_dataset = raw_data.sample(frac=0.8, random_state=0)
test_dataset  = raw_data.drop(train_dataset.index)


test_x_data = test_dataset.drop(columns=[TARGET]).copy()
test_y_data = test_dataset[TARGET].copy()


labeled_train_data   = train_dataset.sample(frac=0.6, random_state=0).copy()
unlabeled_train_data = train_dataset.drop(labeled_train_data.index).copy()


unlabeled_train_data_actual = unlabeled_train_data.pop(TARGET).copy()
labeled_data_labels         = labeled_train_data.pop(TARGET).copy()

len(train_dataset), len(labeled_train_data), len(unlabeled_train_data), len(test_dataset)


In [None]:


from sklearn.preprocessing import StandardScaler


print("Columns:", train_dataset.columns.tolist())


feature_cols = [c for c in train_dataset.columns if c != TARGET]


scaler_fs = StandardScaler().fit(labeled_train_data[feature_cols])


X_fs_tr = scaler_fs.transform(labeled_train_data[feature_cols]).astype("float32")
y_fs_tr = labeled_data_labels.values.astype("float32")   


X_fs_te = scaler_fs.transform(test_dataset[feature_cols]).astype("float32")
y_fs_te = test_dataset[TARGET].values.astype("float32")

INPUT_DIM_FS = X_fs_tr.shape[1]
print("Supervised shapes → X_tr:", X_fs_tr.shape, " X_te:", X_fs_te.shape, "  INPUT_DIM_FS:", INPUT_DIM_FS)


In [None]:
# === data prep ===

X_u = scaler_fs.transform(unlabeled_train_data[feature_cols]).astype("float32")


n_all = X_fs_tr.shape[0]
n_val = int(round(n_all * 0.20))
X_l_tr, y_l_tr = X_fs_tr[:n_all - n_val], y_fs_tr[:n_all - n_val]
X_l_va, y_l_va = X_fs_tr[n_all - n_val:], y_fs_tr[n_all - n_val:]

X_u.shape, X_l_tr.shape, X_l_va.shape

In [None]:
# === Label Propagation (continuous) — build graph & propagate ===

from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix, diags, identity
from scipy.sparse.linalg import spsolve


K_NEIGHBORS = 15     
ALPHA       = 0.9    
SIGMA_MODE  = "median"  


X_all = np.vstack([X_l_tr, X_u])
n_l   = X_l_tr.shape[0]
n_u   = X_u.shape[0]
n     = n_l + n_u
assert n_l > 0 and n_u > 0, "labeled or unlabeled is empty，Please check the split ratio.。"


nbr_tmp = NearestNeighbors(n_neighbors=min(10, max(2, X_all.shape[0]-1)), metric='euclidean').fit(X_all)
dists_tmp, _ = nbr_tmp.kneighbors(X_all)
if SIGMA_MODE == "median":
    sigma = np.median(dists_tmp[:, 1:]) + 1e-12  
else:
    sigma = float(SIGMA_MODE)
print(f"[LP] RBF sigma = {sigma:.6f}")


nbrs = NearestNeighbors(n_neighbors=min(K_NEIGHBORS, n-1), metric='euclidean').fit(X_all)
dists, inds = nbrs.kneighbors(X_all)  # [n, k]
rows = np.repeat(np.arange(n), inds.shape[1])
cols = inds.ravel()
w    = np.exp(-(dists**2) / (2.0 * sigma**2)).ravel()
W    = csr_matrix((w, (rows, cols)), shape=(n, n))
W    = W.maximum(W.T)  


d = np.asarray(W.sum(axis=1)).ravel() + 1e-12
D_inv = diags(1.0 / d)
S = (D_inv @ W).tocsr()


S_ul = S[n_l:n, 0:n_l]
S_uu = S[n_l:n, n_l:n]
Iuu  = identity(n_u, format="csc")
rhs  = (ALPHA * S_ul) @ y_l_tr
A    = Iuu - (ALPHA * S_uu).tocsc()


y_u_hat = spsolve(A, rhs)

print(f"[LP] propagated unlabeled = {y_u_hat.shape}, mean={float(y_u_hat.mean()):.4f}, std={float(y_u_hat.std()):.4f}")


In [None]:
# === Inductive prediction (RBF smoother, no test leakage) + export ===
import os, pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_absolute_error, mean_squared_error

def rbf_knn_predict(X_train, y_train, X_query, k=15, sigma=1.0):
    nbr = NearestNeighbors(n_neighbors=min(k, max(1, X_train.shape[0])), metric='euclidean').fit(X_train)
    d, ind = nbr.kneighbors(X_query)  # d: [m,k], ind: [m,k]
    w = np.exp(-(d**2) / (2.0 * sigma**2)) + 1e-12
    y_neighbors = y_train[ind]        # [m,k]
    return (w * y_neighbors).sum(axis=1) / w.sum(axis=1)


X_tr_aug = np.vstack([X_l_tr, X_u])
y_tr_aug = np.concatenate([y_l_tr, y_u_hat])


yhat_tr = rbf_knn_predict(X_tr_aug, y_tr_aug, X_l_tr, k=K_NEIGHBORS, sigma=sigma)
yhat_va = rbf_knn_predict(X_tr_aug, y_tr_aug, X_l_va, k=K_NEIGHBORS, sigma=sigma)
yhat_te = rbf_knn_predict(X_tr_aug, y_tr_aug, X_fs_te, k=K_NEIGHBORS, sigma=sigma)

def mb(y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    mse  = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2   = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return {"Loss": mae, "MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2, "MAPE": mape}

m_tr = mb(y_l_tr, yhat_tr)
m_va = mb(y_l_va, yhat_va)
m_te = mb(y_fs_te, yhat_te)

summary_df = pd.DataFrame.from_dict({"Train": m_tr, "Val": m_va, "Test": m_te},
                                    orient="index")[["Loss","MAE","MSE","RMSE","R2","MAPE"]]
print("[Label Propagation (continuous)] Summary:\n", summary_df)


In [None]:

os.makedirs("outputs", exist_ok=True)
pd.DataFrame({"y_true": y_fs_te, "y_pred": yhat_te}).to_excel("outputs/labelprop_test_preds.xlsx", index=False)

with pd.ExcelWriter("outputs/labelprop_errors.xlsx", engine="openpyxl") as w:
    summary_df.to_excel(w, sheet_name="summary")
    pd.DataFrame({"y_true": y_l_tr, "y_pred": yhat_tr, "residual": y_l_tr - yhat_tr}).to_excel(w, sheet_name="pred_train", index=False)
    pd.DataFrame({"y_true": y_l_va, "y_pred": yhat_va, "residual": y_l_va - yhat_va}).to_excel(w, sheet_name="pred_val", index=False)
    pd.DataFrame({"y_true": y_fs_te, "y_pred": yhat_te, "residual": y_fs_te - yhat_te}).to_excel(w, sheet_name="pred_test", index=False)
    pd.DataFrame({"lp_pseudo_y_unlabeled": y_u_hat}).to_excel(w, sheet_name="lp_pseudo_unlabeled", index=False)

print("Saved → outputs/labelprop_errors.xlsx, outputs/labelprop_test_preds.xlsx")
