In [None]:

import pandas as pd
import glob
import numpy as np
import os
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")
os.environ["SCIPY_ARRAY_API"] = "1"

from pbm.preprocess import PreprocConfig, preprocess_profiles
from pbm.manifold import compute_side_features, scale_features, embed_umap_euclid, ManifoldConfig, embed_umap_fastdtw
from pbm.segmentation import ClusterConfig, cluster_hdbscan, assign_anomaly_scores, build_cluster_prototypes, summarize_clusters
from pbm.report import save_pbm_map, save_cluster_distribution_plot, save_cluster_prototype_plots, export_csv_summaries, build_html_report
from pbm.forecast import build_prefix_scaled_channel, make_matrices, knn_forecast, multioutput_forecast, evaluate_forecasts, save_predictions_csv, plot_example


In [None]:

folder = "data"
all_csv = glob.glob(os.path.join(folder, "*.csv"))
df_0 = pd.concat((pd.read_csv(f) for f in all_csv), ignore_index=True)
print(f"Считано файлов: {len(all_csv)}")
df_0 = df_0.rename(columns={"BBLS_OIL_COND":"oil", "MCF_GAS": 'gas', "BBLS_WTR":"water", "API_WellNo":"well_name", "RptDate":"date", "DAYS_PROD":"days_prod"})
df_0["date"] = pd.to_datetime(df_0["date"])

df = df_0.drop(columns=["Lease_Unit", "Formation"])
print(df.shape)
df = df.sort_values(by=["well_name", "date"]).reset_index(drop=True)
df = df[(df["oil"] >= 0) & (df["gas"] >= 0) & (df["water"] >= 0)]
df


In [None]:

cfg = PreprocConfig()
out = preprocess_profiles(df, cfg)


In [None]:

panel_long = out["panel_long"]
X = out["X"]
wells = out["wells_used"]
tensor_channels = out["tensor_channels"]
T = out["config"].get("T", X.shape[1])

feats = compute_side_features(panel_long, T=T)
feats_scaled, scaler = scale_features(feats)

Z_euclid, umap_e = embed_umap_euclid(
    X, tensor_channels=tensor_channels, channels=["r_oil_norm", "wc"],
    n_neighbors=30, min_dist=0.05, n_components=2, random_state=42
)

cfg_m = ManifoldConfig(channels=("r_oil_norm", "wc"), fastdtw_radius=6, k_refine=40,
                       weights=(0.7, 0.3), n_neighbors=30, min_dist=0.05,
                       n_components=2, random_state=42)
Z_dtw, sub_idx, D, info = embed_umap_fastdtw(
    X, tensor_channels=tensor_channels, channels=cfg_m.channels,
    cfg=cfg_m, sample_size=500
)

df_map = pd.DataFrame({
    "well_name": np.array(wells)[sub_idx],
    "x": Z_dtw[:,0],
    "y": Z_dtw[:,1],
})


In [None]:

cfg_c = ClusterConfig(min_cluster_size=50, min_samples=12)
res = cluster_hdbscan(Z_dtw, np.array(wells)[sub_idx].tolist(), cfg_c)
df_map = assign_anomaly_scores(res["df_map"], Z_dtw, res["labels"])
protos = build_cluster_prototypes(panel_long, df_map, channels=("r_oil_s","wc","gor","r_oil_norm"), T=T, method="auto")
summary = summarize_clusters(df_map)


In [None]:

out_dir = "./pbm_report_exports"
map_png = save_pbm_map(Z_dtw, df_map, out_dir)
sizes_png = save_cluster_distribution_plot(df_map, out_dir)
proto_pngs = save_cluster_prototype_plots(
    panel_long, df_map, protos, channels=("r_oil_s","wc","gor","r_oil_norm"), T=T, out_dir=out_dir
)
csvs = export_csv_summaries(df_map, summary, out_dir, top_anoms=50)
report = build_html_report(out_dir, map_png, sizes_png, proto_pngs, df_map, summary, title="PBM Report")
print("Отчёт:", report)


In [None]:

panel_long = out["panel_long"].copy()
wells_used = out["wells_used"]
T = int(out["config"]["T"])
T_pref = 20

panel_long = build_prefix_scaled_channel(panel_long, wells_used, T=T, T_pref=T_pref,
                                         q=0.90, rate_col="r_oil_s", out_col="r_oil_pref_norm")

X_pref, Y_suffix_true, Y_full = make_matrices(panel_long, wells_used, T=T, T_pref=T_pref,
                                              channel="r_oil_pref_norm", target_col="r_oil_s")

Y_pred_knn, knn_info = knn_forecast(X_pref, Y_full, T_pref=T_pref, K=15)
Y_pred_lr, lr_info = multioutput_forecast(panel_long, wells_used, T=T, T_pref=T_pref, Y_full=Y_full, random_state=43)

m_knn = evaluate_forecasts(Y_suffix_true, Y_pred_knn)
m_lr  = evaluate_forecasts(Y_suffix_true, Y_pred_lr)
print("KNN   → RMSE={rmse:.4f}, sMAPE={smape:.4f}, N={n_eval}".format(**m_knn))
print("ENet  → RMSE={rmse:.4f}, sMAPE={smape:.4f}, N={n_eval}".format(**m_lr))

out_dir = "./forecast_exports"
os.makedirs(out_dir, exist_ok=True)

np.save(os.path.join(out_dir, "Y_suffix_true.npy"), Y_suffix_true)
np.save(os.path.join(out_dir, "Y_pred_knn.npy"), Y_pred_knn)
np.save(os.path.join(out_dir, "Y_pred_enet.npy"), Y_pred_lr)

metrics_df = pd.DataFrame([
    {"model": "knn", "rmse": m_knn["rmse"], "smape": m_knn["smape"], "n_eval": m_knn["n_eval"]},
    {"model": "elasticnet", "rmse": m_lr["rmse"], "smape": m_lr["smape"], "n_eval": m_lr["n_eval"]},
])
metrics_csv = os.path.join(out_dir, "metrics.csv")
metrics_df.to_csv(metrics_csv, index=False)

save_predictions_csv(Y_pred_knn, wells_used, "knn", out_dir, T_pref, T)
save_predictions_csv(Y_pred_lr, wells_used, "elasticnet", out_dir, T_pref, T)

rng = np.random.default_rng(43)
I = np.where(np.isfinite(Y_pred_knn).all(axis=1))[0]
show = rng.choice(I, size=min(3, len(I)), replace=False) if len(I) else []
example_imgs = []
for i in show:
    example_imgs.append(plot_example(i, "knn_example", Y_suffix_true, Y_pred_knn, T_pref, T, out_dir))
    example_imgs.append(plot_example(i, "enet_example", Y_suffix_true, Y_pred_lr, T_pref, T, out_dir))

html = f"""
<html><head><meta charset='utf-8'><title>Forecast Report</title></head><body>
<h2>Forecast evaluation (prefix {T_pref} → total {T})</h2>
<p>Generated: {datetime.utcnow().isoformat()}Z</p>
<table border='1' cellspacing='0' cellpadding='6'>
<tr><th>Model</th><th>RMSE</th><th>sMAPE</th><th>N eval wells</th></tr>
<tr><td>KNN</td><td>{m_knn['rmse']:.4f}</td><td>{m_knn['smape']:.4f}</td><td>{m_knn['n_eval']}</td></tr>
<tr><td>ElasticNet</td><td>{m_lr['rmse']:.4f}</td><td>{m_lr['smape']:.4f}</td><td>{m_lr['n_eval']}</td></tr>
</table>
<h3>Files</h3>
<ul>
  <li>metrics.csv</li>
  <li>pred_knn.csv</li>
  <li>pred_elasticnet.csv</li>
</ul>
<h3>Examples</h3>
{''.join(f"<img src='{os.path.basename(p)}' style='max-width:640px;display:block;margin-bottom:10px;'/>" for p in example_imgs)}
</body></html>
"""
report_path = os.path.join(out_dir, "forecast_report.html")
with open(report_path, "w", encoding="utf-8") as f:
    f.write(html)

print("Saved:", metrics_csv, "and", report_path)
