# Manifold Learning with Forecast

In [None]:
from data_loader import load_data
from preprocess import PreprocConfig, preprocess_profiles
from features_manifold import (compute_side_features, scale_features,
                               embed_umap_euclid, embed_umap_fastdtw, ManifoldConfig)
from clustering import (ClusterConfig, cluster_hdbscan, build_cluster_prototypes,
                        assign_anomaly_scores, summarize_clusters)
from visualization import (save_pbm_map, save_cluster_distribution_plot,
                           save_cluster_prototype_plots, export_csv_summaries,
                           build_html_report)
from forecasting import (build_prefix_scaled_channel, make_matrices,
                         knn_forecast, multioutput_forecast, evaluate_forecasts)
import numpy as np


In [None]:
df = load_data("data")

In [None]:
cfg = PreprocConfig()
out = preprocess_profiles(df, cfg)

In [None]:
# side features and manifold embedding
feats = compute_side_features(out["panel_long"], T=cfg.T)
feats_scaled, scaler = scale_features(feats)
Z_dtw, sub_idx, D, info = embed_umap_fastdtw(
    out["X"], tensor_channels=out["tensor_channels"],
    channels=["r_oil_norm", "wc"], cfg=ManifoldConfig())


In [None]:
# clustering on the manifold
wells_sub = np.array(out["wells_used"])[sub_idx].tolist()
res = cluster_hdbscan(Z_dtw, wells_sub, ClusterConfig())
protos = build_cluster_prototypes(out["panel_long"], res["df_map"], T=cfg.T)


In [None]:
# simple visual outputs
out_dir = "pbm_report_exports"
map_png = save_pbm_map(Z_dtw, res["df_map"], out_dir)
sizes_png = save_cluster_distribution_plot(res["df_map"], out_dir)
summary = summarize_clusters(res["df_map"])
proto_pngs = save_cluster_prototype_plots(out["panel_long"], res["df_map"], protos,
                                         channels=("r_oil_s","wc","gor","r_oil_norm"),
                                         T=cfg.T, out_dir=out_dir)
export_csv_summaries(res["df_map"], summary, out_dir)
build_html_report(out_dir, map_png, sizes_png, proto_pngs, res["df_map"], summary)


In [None]:
# forecasting example
panel_long = build_prefix_scaled_channel(out["panel_long"], out["wells_used"],
                                         T=cfg.T, T_pref=20,
                                         rate_col="r_oil_s", out_col="r_oil_pref_norm")
X_pref, Y_suffix, Y_full = make_matrices(panel_long, out["wells_used"],
                                         T=cfg.T, T_pref=20,
                                         channel="r_oil_pref_norm", target_col="r_oil_s")
Y_pred_knn, _ = knn_forecast(X_pref, Y_full, T_pref=20, K=15)
Y_pred_lr, _ = multioutput_forecast(panel_long, out["wells_used"],
                                   T=cfg.T, T_pref=20, Y_full=Y_full)
metrics_knn = evaluate_forecasts(Y_suffix, Y_pred_knn)
metrics_lr = evaluate_forecasts(Y_suffix, Y_pred_lr)
