In [1]:
# === Cell 1: Imports & Config (pv-split version) ===
import os, time
import numpy as np
import pandas as pd
import xgboost as xgb
from pandas.api.types import is_datetime64_any_dtype
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from sklearn.metrics import mean_absolute_error

pd.set_option("display.width", 160)
pd.set_option("display.max_columns", 200)

TIME_COL   = "time"
PV_COL     = "pv_id"
TARGET     = "nins"
RANDOM_SEED = 42

# ‚úÖ Í≥µÏãù ÏµúÏ¢Ö ÌîºÏ≤ò 20Í∞ú (51.35.pyÏôÄ ÎèôÏùº)
FINAL_FEATS = [
 'humidity','wind_gust_spd','hour','doy','wind_spd_b',
 'ceiling','uv_idx','appr_temp','uv_cloud_adj','dow',
 'hour_sin','doy_sin','is_rain','rain','hour_cos',
 'doy_cos','snow','coord1','coord2','haze'
]

MAX_GAP   = 12     # 5Î∂Ñ Í∞ÑÍ≤© Í∏∞Ï§Ä 12Ïπ∏ ‚âà 1ÏãúÍ∞Ñ
DAY_HOURS = (6,18) # Ï£ºÍ∞Ñ ÌîÑÎ°ùÏãú(Î≥¥Í∞Ñ/ÌÅ¥Îû®ÌîÑÏóê ÏÇ¨Ïö©)

np.random.seed(RANDOM_SEED)
print("‚úÖ FINAL_FEATS:", len(FINAL_FEATS))


‚úÖ FINAL_FEATS: 20


In [2]:
# === Cell 2: Time utils + time features ===
def ensure_naive_datetime(s: pd.Series, tz="Asia/Seoul") -> pd.Series:
    if not is_datetime64_any_dtype(s):
        s = pd.to_datetime(s, errors="coerce", utc=True)
    if isinstance(s.dtype, DatetimeTZDtype):
        try:
            s = s.dt.tz_convert(tz).dt.tz_localize(None)
        except Exception:
            s = s.dt.tz_localize(None)
    return s

def cyclical_encode(series: pd.Series, period: int):
    angle = 2*np.pi*(series.astype("float32") % period)/period
    return np.sin(angle).astype("float32"), np.cos(angle).astype("float32")

def add_time_features(df: pd.DataFrame, time_col=TIME_COL) -> pd.DataFrame:
    out = df.copy()
    out[time_col] = ensure_naive_datetime(out[time_col])
    out["hour"] = out[time_col].dt.hour.astype("int16")
    out["dow"]  = out[time_col].dt.dayofweek.astype("int8")
    out["doy"]  = out[time_col].dt.dayofyear.astype("int16")
    out["hour_sin"], out["hour_cos"] = cyclical_encode(out["hour"], 24)
    out["doy_sin"],  out["doy_cos"]  = cyclical_encode(out["doy"], 365)
    return out



In [3]:
# === Cell 3: Feature wrapper (ensure 20 feats exist) ===
def add_features(df: pd.DataFrame) -> pd.DataFrame:
    out = add_time_features(df)

    # is_rain
    if "is_rain" not in out.columns:
        src = "rain" if "rain" in out.columns else None
        out["is_rain"] = (pd.to_numeric(out[src], errors="coerce").fillna(0) > 0).astype("int8") if src else np.int8(0)

    # haze (Í∞ÄÏãúÍ±∞Î¶¨ Ïó≠Ïàò ÌòïÌÉúÍ∞Ä ÏóÜÏúºÎ©¥ 0)
    if "haze" not in out.columns:
        out["haze"] = np.float32(0.0)

    # uv_cloud_adj (ÌÅ¥ÎùºÏö∞Îìú ÏóÜÏúºÎ©¥ uv_idxÎ°ú ÎåÄÏ≤¥)
    if "uv_cloud_adj" not in out.columns:
        out["uv_cloud_adj"] = pd.to_numeric(out.get("uv_idx", 0.0), errors="coerce").astype("float32")

    # pv_id Ïπ¥ÌÖåÍ≥†Î¶¨(Î©îÎ™®Î¶¨‚Üì)
    if PV_COL in out.columns and not pd.api.types.is_categorical_dtype(out[PV_COL]):
        out[PV_COL] = out[PV_COL].astype("category")

    return out


In [4]:
# === Cell 4: Interpolation (physics-aware, memory-light) ===
def interpolate_weather(df: pd.DataFrame, max_gap: int=MAX_GAP) -> pd.DataFrame:
    out = df.copy()
    day_mask = out["hour"].between(DAY_HOURS[0], DAY_HOURS[1])

    # Ïù¥Î≤§Ìä∏Ìòï: is_rain (Î≥¥Í∞Ñ X, ÏßßÏùÄ Í≤∞ÏÜêÎßå Î≥¥ÏôÑ)
    if "is_rain" in out.columns:
        s = pd.to_numeric(out["is_rain"], errors="coerce")
        s = out.groupby(PV_COL, observed=True)["is_rain"].transform(lambda x: x.ffill().bfill()).astype("float32")
        for lev in ([PV_COL,"hour"], [PV_COL]):
            if s.isna().any():
                s = s.fillna(out.groupby(lev, observed=True)["is_rain"].transform("median"))
        out["is_rain"] = s.fillna(0).astype("float32")

    # Ïó∞ÏÜçÌòï: ÏÑ†ÌòïÎ≥¥Í∞Ñ(uvÎäî Ï£ºÍ∞ÑÎßå), ÏûîÏó¨Îäî (pv,hour)->pv->Ï†ÑÏ≤¥ Ï§ëÏïôÍ∞í
    cont_cols = [c for c in ["humidity","wind_spd_b","wind_gust_spd","ceiling","appr_temp","uv_idx","haze"] if c in out.columns]
    for c in cont_cols:
        s = pd.to_numeric(out[c], errors="coerce").astype("float32")
        if c == "uv_idx":
            s = s.where(day_mask, 0.0)
        s_lin = out.groupby(PV_COL, observed=True)[c].transform(
            lambda x: x.interpolate(method="linear", limit=max_gap, limit_direction="both")
        ).astype("float32")
        if s_lin.isna().any():
            s_lin = s_lin.fillna(out.groupby([PV_COL,"hour"], observed=True)[c].transform("median"))
        if s_lin.isna().any():
            s_lin = s_lin.fillna(out.groupby(PV_COL, observed=True)[c].transform("median"))
        out[c] = s_lin.fillna(s_lin.median()).astype("float32")

    # rain/snow: Î≥¥Í∞Ñ ÏóÜÏù¥ 0 Ï±ÑÏõÄ + Î¨ºÎ¶¨Ï†Å ÌÅ¥Î¶Ω
    for c in ["rain","snow"]:
        if c in out.columns:
            out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).clip(lower=0).astype("float32")

    # Î¨ºÎ¶¨ Î≤îÏúÑ ÌÅ¥Î¶Ω
    clip_map = {"humidity":(0,100), "uv_idx":(0,None), "wind_spd_b":(0,None), "wind_gust_spd":(0,None),
                "ceiling":(0,None), "appr_temp":(None,None), "haze":(0,None)}
    for c,(lo,hi) in clip_map.items():
        if c in out.columns:
            x = out[c].astype("float32")
            if lo is not None: x = np.maximum(x, lo)
            if hi is not None: x = np.minimum(x, hi)
            out[c] = x.astype("float32")

    # uv_cloud_adj Í∞±Ïã†(ÌÅ¥ÎùºÏö∞Îìú ÏóÜÏúºÎ©¥ Í∑∏ÎåÄÎ°ú)
    if "uv_idx" in out.columns:
        out["uv_cloud_adj"] = pd.to_numeric(out.get("uv_cloud_adj", out["uv_idx"]), errors="coerce").astype("float32")

    return out


In [5]:
# === Cell 5: Load CSVs + basic FE ===
train_raw = pd.read_csv("train.csv", low_memory=True, memory_map=True)
print("Train raw shape:", train_raw.shape)

train_raw = add_features(train_raw)

# ÌÉÄÍπÉ Ï†ïÏ†ú
train_raw[TARGET] = pd.to_numeric(train_raw[TARGET], errors="coerce").astype("float32")
before = len(train_raw)
train_raw = train_raw.dropna(subset=[TARGET])
print(f"Dropped NaN target rows: {before - len(train_raw)}")


Train raw shape: (19236948, 33)


  if PV_COL in out.columns and not pd.api.types.is_categorical_dtype(out[PV_COL]):


Dropped NaN target rows: 0


In [6]:
# === Cell 6: PV-based split ‚Üí Interp per set (no leakage) ===

def split_by_pv(df: pd.DataFrame, valid_frac: float = 0.2, random_state: int = RANDOM_SEED):
    """pv_id Îã®ÏúÑÎ°ú 8:2 Î∂ÑÌï†"""
    df = df.copy()
    pv_ids = df[PV_COL].dropna().unique()
    rng = np.random.RandomState(random_state)
    rng.shuffle(pv_ids)

    n_va = max(1, int(len(pv_ids) * valid_frac))
    va_pv = set(pv_ids[:n_va])
    tr_pv = set(pv_ids[n_va:])

    train_df = df[df[PV_COL].isin(tr_pv)].copy()
    valid_df = df[df[PV_COL].isin(va_pv)].copy()

    print(f"[PV SPLIT] #pv train={len(tr_pv)}  valid={len(va_pv)}")
    print(f"           rows train={len(train_df)}  valid={len(valid_df)}")
    return train_df, valid_df

# üîÅ pv split Ïã§Ìñâ
train_df, valid_df = split_by_pv(train_raw, valid_frac=0.2, random_state=RANDOM_SEED)

# ÏÑ∏Ìä∏Î≥Ñ Î≥¥Í∞Ñ
train_df = interpolate_weather(train_df, max_gap=MAX_GAP)
valid_df = interpolate_weather(valid_df, max_gap=MAX_GAP)

# ÌïÑÏöîÌïú Ïó¥Îßå ÏñáÍ≤å + NaN 0 ÎåÄÏ≤¥
use_cols_tr = [c for c in (FINAL_FEATS + [TARGET]) if c in train_df.columns]
use_cols_va = [c for c in (FINAL_FEATS + [TARGET]) if c in valid_df.columns]

train_df = train_df[use_cols_tr].copy()
valid_df = valid_df[use_cols_va].copy()

train_df[FINAL_FEATS] = train_df[FINAL_FEATS].astype("float32").fillna(0)
valid_df[FINAL_FEATS] = valid_df[FINAL_FEATS].astype("float32").fillna(0)

X_tr, y_tr = train_df[FINAL_FEATS], train_df[TARGET].values
X_va, y_va = valid_df[FINAL_FEATS], valid_df[TARGET].values

print("Split ‚Üí", X_tr.shape, X_va.shape)


  rng.shuffle(pv_ids)


[PV SPLIT] #pv train=147  valid=36
           rows train=15452628  valid=3784320
Split ‚Üí (15452628, 20) (3784320, 20)


In [7]:
# === Cell 7: Train with early stopping + VALID MAE (pv-split) ===
dtr = xgb.DMatrix(X_tr, label=y_tr, feature_names=FINAL_FEATS)
dva = xgb.DMatrix(X_va, label=y_va, feature_names=FINAL_FEATS)

# 51.35.pyÏóêÏÑú Ïì∞Îçò ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞ Í∑∏ÎåÄÎ°ú ÏÇ¨Ïö©
params = {
    "objective": "reg:squarederror",
    "eval_metric": "mae",
    "learning_rate": 0.06,
    "max_depth": 9,
    "min_child_weight": 4,
    "subsample": 0.75,
    "colsample_bytree": 0.85,
    "reg_alpha": 0.3,
    "reg_lambda": 0.6,
    "random_state": RANDOM_SEED,
    # ÌïÑÏöîÏãú "tree_method": "gpu_hist"
}

print("üöÄ Training (pv-split)...")
bst = xgb.train(
    params,
    dtr,
    num_boost_round=3000,
    evals=[(dtr,"train"), (dva,"valid")],
    early_stopping_rounds=200,
    verbose_eval=200
)

best_n = bst.best_iteration + 1
pred_va = bst.predict(dva, iteration_range=(0, bst.best_iteration+1)).astype("float32")
val_mae = mean_absolute_error(y_va, pred_va)
print(f"‚úÖ Best iter={best_n}  |  VALID MAE(pv)={val_mae:.4f}")


üöÄ Training (pv-split)...
[0]	train-mae:232.73010	valid-mae:235.20144
[200]	train-mae:45.24286	valid-mae:50.04910
[400]	train-mae:41.82376	valid-mae:48.05577
[600]	train-mae:39.72670	valid-mae:46.60492
[800]	train-mae:38.32322	valid-mae:45.66859
[1000]	train-mae:37.13002	valid-mae:44.84501
[1200]	train-mae:36.17707	valid-mae:44.15453
[1400]	train-mae:35.38490	valid-mae:43.62530
[1600]	train-mae:34.71578	valid-mae:43.15866
[1800]	train-mae:34.12281	valid-mae:42.74580
[2000]	train-mae:33.53366	valid-mae:42.32984
[2200]	train-mae:33.06114	valid-mae:41.99356
[2400]	train-mae:32.60883	valid-mae:41.68624
[2600]	train-mae:32.18076	valid-mae:41.38463
[2800]	train-mae:31.76874	valid-mae:41.10036
[2999]	train-mae:31.42271	valid-mae:40.86899
‚úÖ Best iter=3000  |  VALID MAE(pv)=40.8690


In [8]:
import gc
import numpy as np
import pandas as pd

def sample_by_pv(df: pd.DataFrame, frac_pv: float = 0.35, random_state: int = 0):
    df = df.copy()
    pv_ids = df[PV_COL].dropna().unique()
    rng = np.random.RandomState(random_state)
    rng.shuffle(pv_ids)

    n_keep = max(1, int(len(pv_ids) * frac_pv))
    keep_pv = set(pv_ids[:n_keep])

    out = df[df[PV_COL].isin(keep_pv)].copy()
    print(f"[sample_by_pv] frac_pv={frac_pv} ‚Üí #pv={n_keep}, rows={len(out)}")
    return out



In [9]:
from sklearn.metrics import mean_absolute_error
import xgboost as xgb

def run_pv_split_once(seed: int, base_df: pd.DataFrame):
    print(f"\n==============================")
    print(f"‚ñ∂ PV-SPLIT EXPERIMENT  seed={seed}")
    print(f"==============================")

    train_df, valid_df = split_by_pv(base_df, valid_frac=0.2, random_state=seed)

    train_df = interpolate_weather(train_df, max_gap=MAX_GAP)
    valid_df = interpolate_weather(valid_df, max_gap=MAX_GAP)

    use_cols_tr = [c for c in (FINAL_FEATS + [TARGET]) if c in train_df.columns]
    use_cols_va = [c for c in (FINAL_FEATS + [TARGET]) if c in valid_df.columns]

    train_df = train_df[use_cols_tr].copy()
    valid_df = valid_df[use_cols_va].copy()

    train_df[FINAL_FEATS] = train_df[FINAL_FEATS].astype("float32").fillna(0)
    valid_df[FINAL_FEATS] = valid_df[FINAL_FEATS].astype("float32").fillna(0)

    X_tr, y_tr = train_df[FINAL_FEATS], train_df[TARGET].values.astype("float32")
    X_va, y_va = valid_df[FINAL_FEATS], valid_df[TARGET].values.astype("float32")

    print(f"  - X_tr: {X_tr.shape}, X_va: {X_va.shape}")

    dtr = xgb.DMatrix(X_tr, label=y_tr, feature_names=FINAL_FEATS)
    dva = xgb.DMatrix(X_va, label=y_va, feature_names=FINAL_FEATS)

    if "params" in globals():
        p = params.copy()
    else:
        p = {
            "objective": "reg:squarederror",
            "eval_metric": "mae",
            "learning_rate": 0.06,
            "max_depth": 9,
            "min_child_weight": 4,
            "subsample": 0.75,
            "colsample_bytree": 0.85,
            "reg_alpha": 0.3,
            "reg_lambda": 0.6,
            "random_state": seed,
            "tree_method": "hist",
        }

    print("  üöÄ Training XGBoost (sampled pv set)...")
    bst = xgb.train(
        p,
        dtr,
        num_boost_round=2000,
        evals=[(dtr, "train"), (dva, "valid")],
        early_stopping_rounds=150,
        verbose_eval=300,
    )

    best_n = bst.best_iteration + 1

    pred_tr = bst.predict(dtr, iteration_range=(0, bst.best_iteration+1)).astype("float32")
    pred_va = bst.predict(dva, iteration_range=(0, bst.best_iteration+1)).astype("float32")

    mae_tr = mean_absolute_error(y_tr, pred_tr)
    mae_va = mean_absolute_error(y_va, pred_va)

    print(f"  ‚úÖ seed={seed} | best_iter={best_n}")
    print(f"     train MAE = {mae_tr:.4f}")
    print(f"     valid MAE = {mae_va:.4f}")
    print(f"     gap       = {mae_va - mae_tr:.4f}")

    del train_df, valid_df, X_tr, X_va, y_tr, y_va, dtr, dva, bst, pred_tr, pred_va
    gc.collect()

    return {
        "seed": seed,
        "best_iter": best_n,
        "train_mae": mae_tr,
        "valid_mae": mae_va,
        "gap": mae_va - mae_tr,
    }




In [10]:
# 1) train_raw ÏùºÎ∂ÄÎßå ÏÉòÌîåÎßÅ (Ïòà: pvÏùò 35%)
train_sample = sample_by_pv(train_raw, frac_pv=0.35, random_state=0)

# 2) seed=42, 7 Í∞ÅÍ∞Å ÎèåÎ†§Î≥¥Í∏∞
results = []
for sd in [42, 7]:
    res = run_pv_split_once(sd, base_df=train_sample)
    results.append(res)

# 3) Í≤∞Í≥º ÏöîÏïΩ
import pandas as pd
res_df = pd.DataFrame(results)
print("\n===== PV-SPLIT SEED COMPARISON (on sampled PVs) =====")
print(res_df)


  rng.shuffle(pv_ids)


[sample_by_pv] frac_pv=0.35 ‚Üí #pv=64, rows=6727680

‚ñ∂ PV-SPLIT EXPERIMENT  seed=42


  rng.shuffle(pv_ids)


[PV SPLIT] #pv train=52  valid=12
           rows train=5466240  valid=1261440
  - X_tr: (5466240, 20), X_va: (1261440, 20)
  üöÄ Training XGBoost (sampled pv set)...
[0]	train-mae:233.12773	valid-mae:230.93798
[300]	train-mae:39.36338	valid-mae:52.47251
[600]	train-mae:35.62052	valid-mae:50.55576
[900]	train-mae:33.34644	valid-mae:49.38522
[1200]	train-mae:31.67432	valid-mae:48.54135
[1500]	train-mae:30.39731	valid-mae:47.89674
[1800]	train-mae:29.34301	valid-mae:47.37417
[1999]	train-mae:28.69869	valid-mae:47.06550
  ‚úÖ seed=42 | best_iter=2000
     train MAE = 28.6987
     valid MAE = 47.0655
     gap       = 18.3668

‚ñ∂ PV-SPLIT EXPERIMENT  seed=7


  rng.shuffle(pv_ids)


[PV SPLIT] #pv train=52  valid=12
           rows train=5466240  valid=1261440
  - X_tr: (5466240, 20), X_va: (1261440, 20)
  üöÄ Training XGBoost (sampled pv set)...
[0]	train-mae:232.34753	valid-mae:233.64794
[300]	train-mae:40.76926	valid-mae:46.42538
[600]	train-mae:36.89537	valid-mae:44.28922
[900]	train-mae:34.61241	valid-mae:43.10720
[1200]	train-mae:32.95466	valid-mae:42.26582
[1500]	train-mae:31.61984	valid-mae:41.64640
[1800]	train-mae:30.52525	valid-mae:41.15068
[1999]	train-mae:29.90407	valid-mae:40.89233
  ‚úÖ seed=7 | best_iter=2000
     train MAE = 29.9041
     valid MAE = 40.8923
     gap       = 10.9883

===== PV-SPLIT SEED COMPARISON (on sampled PVs) =====
   seed  best_iter  train_mae  valid_mae        gap
0    42       2000  28.698689  47.065495  18.366806
1     7       2000  29.904074  40.892326  10.988253


In [11]:
# === Cell F1: Full-data train (use best_n*1.05, clipped) ===
# train_raw, FINAL_FEATS, interpolate_weather, params Í∞Ä Ïïû ÏÖÄÏóêÏÑú Ïù¥ÎØ∏ Ï†ïÏùòÎê®

# 1) full-data Î≥¥Í∞Ñ
train_full = interpolate_weather(train_raw, max_gap=MAX_GAP)
use_cols = [c for c in (FINAL_FEATS + [TARGET]) if c in train_full.columns]
train_full = train_full[use_cols].copy()
train_full[FINAL_FEATS] = train_full[FINAL_FEATS].astype("float32").fillna(0)

dfull = xgb.DMatrix(train_full[FINAL_FEATS], label=train_full[TARGET].values, feature_names=FINAL_FEATS)

# 2) ÎùºÏö¥Îìú Ïàò: best_n * 1.05, [1800, 4200]Î°ú ÌÅ¥Î¶¨Ìïë
num_round = int(best_n * 1.05)
num_round = max(1800, min(4200, num_round))

print(f"üöÄ Full-data training with num_boost_round={num_round} ...")
final_bst = xgb.train(params, dfull, num_boost_round=num_round, verbose_eval=300)

# train MAE Ìïú Î≤à Ï∞çÏñ¥Î≥¥Í∏∞ (Í≥ºÏ†ÅÌï© Ï†ïÎèÑ Í∞ê)
pred_full = final_bst.predict(dfull).astype("float32")
mae_full = mean_absolute_error(train_full[TARGET].values, pred_full)
print(f"‚úÖ FULL Train MAE = {mae_full:.4f}  |  (ref pv-valid MAE={val_mae:.4f})")

final_bst.save_model("xgb_full_final_pv.json")
print("üíæ Saved ‚Üí xgb_full_final_pv.json")


üöÄ Full-data training with num_boost_round=3150 ...
‚úÖ FULL Train MAE = 31.9649  |  (ref pv-valid MAE=40.8690)
üíæ Saved ‚Üí xgb_full_final_pv.json


In [12]:
# === Cell F2: Predict test + save submission (pv model) ===
TEST_CSV = "test.csv"
SUB_CSV  = "submission_sample.csv" if os.path.exists("submission_sample.csv") else "sample_submission.csv"

test_raw = pd.read_csv(TEST_CSV, low_memory=True, memory_map=True)
test_raw = add_features(test_raw)
test_itp = interpolate_weather(test_raw, max_gap=MAX_GAP)

# ÏûÖÎ†• Ï†ïÎ¶¨
feat_exist = [c for c in FINAL_FEATS if c in test_itp.columns]
test_mat = test_itp[feat_exist].astype("float32").fillna(0)
dte = xgb.DMatrix(test_mat, feature_names=feat_exist)

# ÏòàÏ∏° + ÌÅ¥Î¶¨Ìïë
pred = final_bst.predict(dte).astype("float32")
pred = np.clip(pred, 0, None)

# (ÏÑ†ÌÉù) ÏïºÍ∞Ñ Î≥¥ÏàòÏ†Å ÌÅ¥Îû®ÌîÑ: uv_idx<=0 ÎòêÎäî ÏãúÍ∞Ñ not in [6,18] ‚Üí 0
if "uv_idx" in test_itp.columns and "hour" in test_raw.columns:
    mask_night = (pd.to_numeric(test_itp["uv_idx"], errors="coerce") <= 0) | ~test_raw["hour"].between(6,18)
    pred[mask_night.values] = 0.0

# Ï†úÏ∂ú Ìè¨Îß∑ ÎßûÏ∂îÍ∏∞
sub = pd.read_csv(SUB_CSV)
if len(sub) == len(pred):
    sub["nins"] = pred
else:
    merge_keys = [k for k in ["time","pv_id","type"] if k in sub.columns and k in test_raw.columns]
    for df_ in (sub, test_raw):
        if "time" in df_.columns:
            df_["time"] = pd.to_datetime(df_["time"], errors="coerce")
    ref = test_raw[merge_keys].copy()
    ref["nins"] = pred
    sub = sub.drop(columns=["nins"], errors="ignore").merge(ref, on=merge_keys, how="left")

sub["nins"] = sub["nins"].fillna(0).astype("float32")
sub.to_csv("result_submission_pvsplit.csv", index=False)
print("‚úÖ Saved: result_submission_pvsplit.csv")


  if PV_COL in out.columns and not pd.api.types.is_categorical_dtype(out[PV_COL]):


‚úÖ Saved: result_submission_pvsplit.csv
