In [23]:
import os
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_diabetes

In [20]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Regression-Experiment")
print("Connected to:", mlflow.get_tracking_uri())

2026/02/21 10:38:56 INFO mlflow.tracking.fluent: Experiment with name 'Regression-Experiment' does not exist. Creating a new experiment.


Connected to: sqlite:///mlflow.db


In [21]:
# Dataset
X, y = load_diabetes(return_X_y=True)

print("Dataset shape:", X.shape, y.shape)
print("First 5 rows of X:\n", X[:5])
print("First 5 values of y:\n", y[:5])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Dataset shape: (442, 10) (442,)
First 5 rows of X:
 [[ 0.03807591  0.05068012  0.06169621  0.02187239 -0.0442235  -0.03482076
  -0.04340085 -0.00259226  0.01990749 -0.01764613]
 [-0.00188202 -0.04464164 -0.05147406 -0.02632753 -0.00844872 -0.01916334
   0.07441156 -0.03949338 -0.06833155 -0.09220405]
 [ 0.08529891  0.05068012  0.04445121 -0.00567042 -0.04559945 -0.03419447
  -0.03235593 -0.00259226  0.00286131 -0.02593034]
 [-0.08906294 -0.04464164 -0.01159501 -0.03665608  0.01219057  0.02499059
  -0.03603757  0.03430886  0.02268774 -0.00936191]
 [ 0.00538306 -0.04464164 -0.03638469  0.02187239  0.00393485  0.01559614
   0.00814208 -0.00259226 -0.03198764 -0.04664087]]
First 5 values of y:
 [151.  75. 141. 206. 135.]


In [22]:
# This is the core of the script. Everything indented inside this with block is executed as a single "Run".
with mlflow.start_run(run_name="linear_regression_baseline"):
	# Model
	simple_model = LinearRegression()
	simple_model.fit(X_train, y_train)
	y_pred = simple_model.predict(X_test)

	# Metrics
	rmse = mean_squared_error(y_test, y_pred) ** 0.5
	r2 = r2_score(y_test, y_pred)

	# Params (include requested fields)
	mlflow.log_param("model_type", "LinearRegression")
	mlflow.log_param("solver", "N/A")
	mlflow.log_param("regularization", "none")
	mlflow.log_param("random_state", 42)

	# Metrics
	mlflow.log_metric("rmse", rmse)
	mlflow.log_metric("r2", r2)

	# Artifact: residual plot
	os.makedirs("artifacts", exist_ok=True)
	residuals = y_test - y_pred
	plt.figure(figsize=(6,4))
	plt.scatter(y_pred, residuals, alpha=0.7)
	plt.axhline(0, color="red", linestyle="--")
	plt.xlabel("Predicted")
	plt.ylabel("Residual")
	plt.title("Linear Regression Residual Plot")
	residual_plot_path = "artifacts/linear_residual_plot.png"
	plt.savefig(residual_plot_path, bbox_inches="tight")
	plt.close()
	# Takes the local file path ("artifacts/linear_residual_plot.png") and uploads that file to the MLflow server.
	# You will be able to view this image directly in the MLflow UI for this run.
	mlflow.log_artifact(residual_plot_path)

	# Log model
	# Serializes (pickles) the Scikit-learn model and uploads it to MLflow.
	# Why do this? This allows you to "load" this exact model later for inference using mlflow.pyfunc.load_model() without needing to retrain it.
	mlflow.sklearn.log_model(simple_model, artifact_path="simple_model")

	print("Linear run logged. RMSE:", rmse, "R2:", r2)



Linear run logged. RMSE: 53.85344583676591 R2: 0.4526027629719198
