In [12]:
from pathlib import Path

import polars as pl
import polars.selectors as cs
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import xgboost as xgb

In [13]:
competition_path = Path("/Users/zacklarsen/Documents/Projects/kaggle-wids-datathon-2020/")
mlflow_path = Path(competition_path, "mlruns/")
data_path = Path(competition_path, "data/")

In [14]:
X_train = pl.read_parquet(data_path / "X_train.parquet")
X_test = pl.read_parquet(data_path / "X_test.parquet")
y_train = pl.read_parquet(data_path / "y_train.parquet")
y_test = pl.read_parquet(data_path / "y_test.parquet")

In [15]:
mlflow.xgboost.autolog()
#mlflow.set_tracking_uri("http://localhost:5000")
#mlflow.set_tracking_uri("/Users/zacklarsen/Documents/Projects/kaggle-wids-datathon-2020/mlruns/")
mlflow.set_tracking_uri(mlflow_path)

In [16]:
with mlflow.start_run() as run:
    model = xgb.XGBClassifier()
    model.fit(X_train.select(cs.numeric()), y_train)

    # After training your model
    run_id = run.info.run_id
    experiment_id = run.info.experiment_id
    
    model_uri = f"runs:/{run_id}/model"
    model_name = "XGBoost_1.json"
    registered_model_version = mlflow.register_model(model_uri, model_name)

Registered model 'XGBoost_1.json' already exists. Creating a new version of this model...
2023/10/08 15:16:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: XGBoost_1.json, version 2
Created version '2' of model 'XGBoost_1.json'.


In [17]:
# Create an MLflow client
client = mlflow.tracking.MlflowClient()

In [18]:
client.search_experiments()

[<Experiment: artifact_location='/Users/zacklarsen/Documents/Projects/kaggle-wids-datathon-2020/mlruns/0', creation_time=1696796075446, experiment_id='0', last_update_time=1696796075446, lifecycle_stage='active', name='Default', tags={}>]

In [19]:
# Get the artifacts for the run
artifacts = client.list_artifacts(run_id)

In [20]:
artifacts

[<FileInfo: file_size=102, is_dir=False, path='feature_importance_weight.json'>,
 <FileInfo: file_size=14721, is_dir=False, path='feature_importance_weight.png'>,
 <FileInfo: file_size=None, is_dir=True, path='model'>]

In [21]:
for artifact in artifacts:
    print(artifact.path)

feature_importance_weight.json
feature_importance_weight.png
model


In [23]:
# Download the artifacts to a local directory
for artifact in artifacts:
    client.download_artifacts(run_id, artifact.path, mlflow_path)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]