In [1]:
!pip install --quiet mlflow

In [1]:
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
class WineModelTrainer:
    def __init__(self):
        self.model = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.accuracy = None

    def load_data(self):
        """
        Se carga la data y se divide en train - test.
        """
        data = load_wine()
        X = pd.DataFrame(data.data, columns=data.feature_names)
        y = pd.Series(data.target)
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    def train_model(self, n_estimators=100):
        """
        Se entrena un modelo de Random Forest CLassifier. Antes de entrenar, se realizan los procesos de feature engineering, limpieza de datos, etc.
        Esto es un ejemplo.
        """
        self.model = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
        self.model.fit(self.X_train, self.y_train)

    def evaluate_model(self):
        """
        Función para evaluar el modelo. La métrica utilizada es accuracy, eso se puede cambiar, acorde al problema, y a la decisión que tomen.
        """
        y_pred = self.model.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        print(f'Accuracy: {self.accuracy}')

In [3]:
class MLflowLogger:
    def __init__(self, run_name="WineModelRun"):
        mlflow.start_run(run_name=run_name)

    def log_params(self, params):
        mlflow.log_params(params)

    def log_metric(self, name, value):
        mlflow.log_metric(name, value)

    def log_model(self, model, model_name="model"):
        mlflow.sklearn.log_model(model, model_name)

    def end_run(self):
        mlflow.end_run()

In [4]:
class WineModelPredictor:
    def __init__(self, logged_model):
        self.model = mlflow.pyfunc.load_model(logged_model)
       
    def predict(self, new_data):
        if self.model:
            return self.model.predict(new_data)
        else:
            print("Model not loaded. Cannot make predictions.")
            return None

def evaluate_predictions(true_labels, predicted_labels):
    if len(true_labels)>1:
        return accuracy_score(true_labels, predicted_labels)
    elif true_labels==predicted_labels:
        return 1
    else:
        return 0

In [5]:
wine_trainer = WineModelTrainer()
wine_trainer.load_data()
n_estimators=200
wine_trainer.train_model(n_estimators=n_estimators)
wine_trainer.evaluate_model()

mlflow_logger = MLflowLogger()

mlflow_logger.log_params({"n_estimators": n_estimators})
mlflow_logger.log_metric("accuracy", wine_trainer.accuracy)
mlflow_logger.log_model(wine_trainer.model)
mlflow_logger.end_run()

Accuracy: 1.0


In [11]:
test_data = wine_trainer.X_test.iloc[13].values.reshape(-1,1).T

model_predictor = WineModelPredictor(logged_model = 'runs:/92af19627633430ab94a20f87607299b/model')

predicted_label = model_predictor.predict(test_data)[0]

true_label= [1]
accuracy = evaluate_predictions(true_label, predicted_label)

mlflow.start_run(run_name="ProductionRun")

mlflow.log_metric("accuracy", accuracy)

predictions_df = pd.DataFrame({"True_Labels": true_label, "Predicted_Labels": predicted_label})
predictions_artifact_path = "predictions.csv"
predictions_df.to_csv(predictions_artifact_path, index=False)
mlflow.log_artifact(predictions_artifact_path)

mlflow.end_run()

