In [2]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.21.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.21.3 (from mlflow)
  Downloading mlflow_skinny-2.21.3-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.21.3->mlflow)
  Downloading databricks_sdk-0.50.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading uvicorn-0.34.1-py3-none-any.whl.metadata (6.5 k

In [9]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from abc import ABC, abstractmethod
import mlflow
import mlflow.sklearn
import pandas as pd

class Evaluation(ABC):
    @abstractmethod
    def calculate_scores(self, y_true, y_pred):
        pass

class MSE(Evaluation):
    def calculate_scores(self, y_true, y_pred):
        return mean_squared_error(y_true, y_pred)

class R2(Evaluation):
    def calculate_scores(self, y_true, y_pred):
        return r2_score(y_true, y_pred)

# Load data
data = load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
X_train, X_test, y_train, y_test = train_test_split(df, data.target, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = MSE()
r2 = R2()
mse_score = mse.calculate_scores(y_test, y_pred)
r2_score_value = r2.calculate_scores(y_test, y_pred)

# MLflow logging
input_example = pd.DataFrame([X_test.iloc[0]])  # Now X_test is a DataFrame
mlflow.set_experiment("diabetes_evaluation")
mlflow.end_run()  # In case a run is already active

with mlflow.start_run():
    mlflow.log_metric("mse", mse_score)
    mlflow.log_metric("r2", r2_score_value)
    mlflow.sklearn.log_model(model, "model", input_example=input_example)
with mlflow.start_run():
    mlflow.log_metric("mse", mse_score)
    mlflow.log_metric("r2", r2_score_value)
    mlflow.sklearn.log_model(model, "model", input_example=input_example)

print("MSE Score:", mse_score)
print("R2 Score:", r2_score_value)



MSE Score: 2900.193628493482
R2 Score: 0.4526027629719195
