## Submission Code (Submission Notebook)

### Overview
- **Objective**: Load a trained model from the published notebook , make predictions on test data, and create a submission file.
- **Outcome**: `/kaggle/working/submission.parquet`

### Steps
1. **Data Load**
- Load `test.csv` and apply the same preprocessing as during training.

2. **Load Meta Information**
- Load `meta.json` and recreate `feat_cols` and `label_cols`.

3. **Model Load & Inference**
- Loop through `label_cols` and load the saved Catboost model (`.pkl`).
- Calculate predictions for the corresponding target.

4. **Constructing submission data**
- `submission = pd.DataFrame({"date_id": test["date_id"], ...})`

In [1]:
import os, sys, json, warnings, joblib
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import polars as pl
import json
import kaggle_evaluation.mitsui_inference_server

In [2]:
with open('/kaggle/input/baseline-catboost-train/models/meta.json', "r") as f_json:
    f_j = json.load(f_json)

f_j

{'feat_cols': ['FX_AUDCAD',
  'FX_AUDCHF',
  'FX_AUDJPY',
  'FX_AUDNZD',
  'FX_AUDUSD',
  'FX_CADCHF',
  'FX_CADJPY',
  'FX_CADUSD',
  'FX_CHFJPY',
  'FX_EURAUD',
  'FX_EURCAD',
  'FX_EURCHF',
  'FX_EURGBP',
  'FX_EURJPY',
  'FX_EURNZD',
  'FX_EURUSD',
  'FX_GBPAUD',
  'FX_GBPCAD',
  'FX_GBPCHF',
  'FX_GBPJPY',
  'FX_GBPNZD',
  'FX_GBPUSD',
  'FX_NOKCHF',
  'FX_NOKEUR',
  'FX_NOKGBP',
  'FX_NOKJPY',
  'FX_NOKUSD',
  'FX_NZDCAD',
  'FX_NZDCHF',
  'FX_NZDJPY',
  'FX_NZDUSD',
  'FX_USDCHF',
  'FX_USDJPY',
  'FX_ZARCHF',
  'FX_ZAREUR',
  'FX_ZARGBP',
  'FX_ZARJPY',
  'FX_ZARUSD',
  'JPX_Gold_Mini_Futures_Close',
  'JPX_Gold_Mini_Futures_High',
  'JPX_Gold_Mini_Futures_Low',
  'JPX_Gold_Mini_Futures_Open',
  'JPX_Gold_Mini_Futures_Volume',
  'JPX_Gold_Mini_Futures_open_interest',
  'JPX_Gold_Mini_Futures_settlement_price',
  'JPX_Gold_Rolling-Spot_Futures_Close',
  'JPX_Gold_Rolling-Spot_Futures_High',
  'JPX_Gold_Rolling-Spot_Futures_Low',
  'JPX_Gold_Rolling-Spot_Futures_Open',
  'JPX_Gol

In [3]:
# m_json = pd.read_json('/kaggle/input/mitsui-lgbm-models-v1/models/meta.json')

In [4]:
DATA_PATH = "/kaggle/input/mitsui-commodity-prediction-challenge"
MODEL_INPUT_DIR = "/kaggle/input/baseline-catboost-train/models/" # Replace with your Dataset added by Add data

def preprocess_for_lgbm(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    obj = df.select_dtypes(include=["object"]).columns
    if len(obj) > 0:
        df[obj] = df[obj].apply(pd.to_numeric, errors="coerce")
    for c in df.select_dtypes(include=["category"]).columns:
        df[c] = df[c].cat.codes
    return df

# meta と モデル群をロード
with open(os.path.join(MODEL_INPUT_DIR, "meta.json"), "r") as f:
    meta = json.load(f)
feat_cols  = meta["feat_cols"]
label_cols = meta["label_cols"]

trained_models = {}
for tgt in label_cols:
    pkl = os.path.join(MODEL_INPUT_DIR, f"{tgt}.pkl")
    skp = os.path.join(MODEL_INPUT_DIR, f"{tgt}.skip")
    if os.path.exists(pkl):
        trained_models[tgt] = joblib.load(pkl)
    elif os.path.exists(skp):
        trained_models[tgt] = None
    else:
        trained_models[tgt] = None  

def predict(test_batch: pl.DataFrame | pd.DataFrame, lag1, lag2, lag3, lag4) -> pd.DataFrame:
    if isinstance(test_batch, pl.DataFrame):
        Xb_raw = test_batch.to_pandas()
    else:
        Xb_raw = test_batch
    Xb_raw = preprocess_for_lgbm(Xb_raw)
    Xb = Xb_raw[feat_cols]

    out = {}
    for tgt in label_cols:
        mdl = trained_models.get(tgt)
        if mdl is None:
            out[tgt] = 0.0
        else:
            yhat = mdl.predict(Xb)
                               # num_iteration=getattr(mdl, "best_iteration_", None))
            out[tgt] = float(np.asarray(yhat).mean())
    return pd.DataFrame([out], columns=label_cols)

# ===== Generate submission via gateway =====
server = kaggle_evaluation.mitsui_inference_server.MitsuiInferenceServer(predict)
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    server.serve()  
else:
    server.run_local_gateway((DATA_PATH,))  # Local Verification

In [5]:
# # ===== Inspect submission artifact (Parquet -> CSV for inspection) =====
# # Run if necessary to check
# pq_path  = "/kaggle/working/submission.parquet"
# csv_out  = "/kaggle/working/submission_from_parquet.csv"

# sub_pl = pl.read_parquet(pq_path)
# print(f"[OK] Loaded Parquet: {pq_path} shape={sub_pl.shape}")

# # Export CSV for confirmation (not for submission)
# sub_pl.write_csv(csv_out)
# print(f"[OK] Wrote CSV for inspection: {csv_out}")

# sub = sub_pl.to_pandas()
# pd.set_option("display.max_columns", 30)
# sub.head()