In [None]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Set MLflow Tracking URI (connects to running MLflow server in Docker)
mlflow.set_tracking_uri("http://localhost:5000")

with mlflow.start_run():
    # Load data
    data = pd.read_csv("data/data.csv")
    X = data.drop(columns=["Global Innovation Score"])
    y = data["Global Innovation Score"]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestRegressor(n_estimators=50)
    model.fit(X_train, y_train)

    # Predict and calculate error
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)

    # Log parameters and metrics
    mlflow.log_param("n_estimators", 50)
    mlflow.log_metric("mse", mse)

    # Log model
    mlflow.sklearn.log_model(model, "model")

    # Register model to Model Registry
    result = mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/model", "GlobalInnovationModel")

    print(f"Model registered as: {result.name} - Version {result.version}")
