# Getting Started with MLflow

This notebook demonstrates basic MLflow usage in the containerized environment.

In [None]:
import mlflow
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

print("MLflow version:", mlflow.__version__)
print("MLflow tracking URI:", mlflow.get_tracking_uri())

In [None]:
# Create sample data
np.random.seed(42)
X = np.random.randn(1000, 5)
y = X.sum(axis=1) + np.random.randn(1000) * 0.1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")

In [None]:
# Train model with MLflow tracking
mlflow.set_experiment("getting-started")

with mlflow.start_run():
    # Log parameters
    n_estimators = 100
    max_depth = 5
    
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    
    # Train model
    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    
    # Log metrics
    mlflow.log_metric("rmse", rmse)
    
    # Log model
    mlflow.sklearn.log_model(model, "model")
    
    print(f"RMSE: {rmse:.4f}")
    print(f"Run ID: {mlflow.active_run().info.run_id}")

## Next Steps

1. Visit http://localhost:5000 to see your experiment in the MLflow UI
2. Try different parameters and compare results
3. Load your own data into the `data/` directory
4. Explore the factory-systems integration