In [1]:
# 03_predict.ipynb

import pandas as pd
import mlflow
from pathlib import Path

In [2]:
# 📌 Chemins
FEATURES_DIR = Path("../data/features")
PREDICTIONS_DIR = Path("../data/predictions")
PREDICTIONS_DIR.mkdir(parents=True, exist_ok=True)

In [3]:
# 🎯 Paramètres
EXPERIMENT_NAME = "ETF_PEA_MLOpsZoomcamp"

In [4]:
# ✅ Charger les données
feature_files = list(FEATURES_DIR.glob("*_features.parquet"))
dfs = []

for file in feature_files:
    df = pd.read_parquet(file)
    df['ticker'] = file.stem.split("_")[0]
    dfs.append(df)

df_all = pd.concat(dfs).reset_index(drop=True)
print(f"✅ Loaded {len(df_all)} rows")

✅ Loaded 3802 rows


In [5]:
# ✅ Sélection des features (hors target, ticker)
feature_cols = [col for col in df_all.columns if col not in [('target', ''), 'ticker']]

In [6]:
# 💡 Nettoyage des noms de colonnes pour compatibilité LightGBM
clean_feature_cols = [
    col if isinstance(col, str) else col[0] if col[1] == '' else f"{col[0]}_{col[1]}"
    for col in feature_cols
]

df_all.columns = [
    col if isinstance(col, str) else col[0] if col[1] == '' else f"{col[0]}_{col[1]}"
    for col in df_all.columns
]

X = df_all[clean_feature_cols]

In [7]:
# ✅ Charger le modèle MLflow
mlflow.set_tracking_uri("file:///G:/Mon Drive/DataTalksClub/MLOps-ETF-PEA/mlruns")
model_uri = f"models:/{EXPERIMENT_NAME}/latest"

model = mlflow.pyfunc.load_model(model_uri)
print("✅ Model loaded from MLflow")

OSError: No such file or directory: 'G:\Mon Drive\DataTalksClub\MLOps-ETF-PEA\mlruns\279597552178412770\71bb2a63ce314583a6cffaff0b95166b\artifacts\model\.'

In [None]:












# ✅ Prédictions
df_all['prediction'] = model.predict(X)
print("✅ Predictions completed")

# ✅ Sauvegarde des prédictions
output_path = PREDICTIONS_DIR / "predictions.parquet"
df_all.to_parquet(output_path, index=False)
print(f"✅ Predictions saved to {output_path}")
