In [1]:
from src.models.exponential.train import ExponentialModel
import pandas as pd
from pathlib import Path
import numpy as np
from typing import Tuple, List
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
mlflow.set_tracking_uri("http://localhost:5000")


In [3]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


def get_train_test(
    train_name: str, test_name: str
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    data_folder = Path().resolve().parent / "data"
    train = pd.read_parquet(path=data_folder / train_name)
    test = pd.read_parquet(path=data_folder / test_name)
    return train, test

In [4]:
class ColumnDropperTransformer(TransformerMixin):
    def __init__(self, columns: List[str]):
        self.columns = columns

    def transform(self, X: pd.DataFrame, y=None):
        return X.drop(columns=self.columns, errors="ignore")

    def fit(self, X: pd.DataFrame, y=None):
        return self


In [5]:
def get_variables(df: pd.DataFrame, target: str) -> Tuple[pd.DataFrame, pd.Series]:
    X = df.drop(columns=target)
    y = df[target]
    return X, y


In [6]:
def plot_predictions(X_test: pd.DataFrame, y_test: pd.Series, model: Pipeline):
    plt.figure(figsize=(10, 8))
    plt.scatter(x=X_test.distancia, y=y_test, marker=".", label="data", alpha=0.3)
    plt.scatter(
        x=X_test.distancia,
        y=model.predict(X_test),
        marker=".",
        label="model",
        alpha=0.3,
    )
    plt.legend()
    plt.show()

In [7]:
train, test = get_train_test(train_name="train.parquet", test_name="test.parquet")


In [8]:
rnd = np.random.RandomState(42)

In [9]:
X_train, y_train = get_variables(df=train, target="coste")
X_test, y_test = get_variables(df=test, target="coste")


In [10]:
with mlflow.start_run():
    dropper = ColumnDropperTransformer(columns=["consumo_medio", "coste"])
    model = ExponentialModel(initial_params=rnd.uniform(-1, 1, size=4))
    pipe = Pipeline(steps=[("dropper", dropper), ("model", model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    rmse, mae, r2 = eval_metrics(actual=y_test, pred=y_pred)
    mlflow.log_param("initial_params", model.initial_params)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(pipe, "exponential-pipeline")

  consumption = w0 + w1 * np.exp(-w2 * distance + w3)
