In [1]:
import pandas as pd
import datetime
import mlflow
import pickle

In [2]:
from training_class import TrainingPipeline

<b> Previous knowledge: </b>
- num_bed and num_bath have 12 and 7 zero values.
- condition does not have zero values
- When the renovation date is 0 is understood that the house doesn't have a renovation

In [3]:
df = pd.read_csv(".././data/house_sales.csv")

# Feature engineering

In [4]:
df.loc[(df["num_bed"] == 0), "num_bed"] = 1
df.loc[(df["num_bath"] == 0), "num_bed"] = 0.5
df["avg_total_size_neighbor"] = df["avg_size_neighbor_houses"]+df["avg_size_neighbor_lot"]
df["total_size"] = df["size_house"]+df["size_lot"]
df["size_over_bed"] = df["total_size"]/df["num_bed"]
df["size_over_bath"] = df["total_size"]/df["num_bath"]
df["numfloors_over_bed"] = df["num_floors"]/df["num_bed"]
df["numfloors_over_bath"] = df["num_floors"]/df["num_bath"]
df["house_age"] = int(datetime.date.today().strftime("%Y")) - df["year_built"]
df["age_over_condition"] = df["house_age"]/df["condition"]
df["had_renovation"] = ~(df["renovation_date"] == 0)

# Training algorithm

In [5]:
features_name =  list(df.columns)
features_name.remove("price")
MODEL_NAME = "Price_prediction"

## ElasticNet modeling

In [6]:
PricePrediction = TrainingPipeline()
EXPERIMENT_NAME = 'elasticnet_price_pred'
MODEL_PATH = 'models/elasticnet/elasticnet_price_pred.pkl'
with mlflow.start_run(run_name= EXPERIMENT_NAME):
    train_param = {"scale": True,
                "test_size": 0.20,
                "features" :features_name,
                "target": ["price"],
                "feature_selection": True,
                "hypotesis_model": "elasticnet",
                "hyperparam": {}}

    PricePrediction.fit(df, train_param)
    PricePrediction.Model_metrics()
    fig1, fig2 = PricePrediction.shap_explain_plot(PricePrediction.X_test)
    if fig1 is not None:
        fig1.show()
    if fig2 is not None:
        fig2.show()
    # Log model
    mlflow.pyfunc.log_model(EXPERIMENT_NAME, python_model = PricePrediction, signature=PricePrediction.signature)
    # Log metrics
    mlflow.log_metric('mae_train', PricePrediction.mae_score_train)
    mlflow.log_metric('mae_test', PricePrediction.mae_score_test)
    mlflow.log_metric('mape_train', PricePrediction.mae_score_train)
    mlflow.log_metric('mape_test', PricePrediction.mae_score_test)
    # Log model parameters
    mlflow.log_params(PricePrediction.hyperparam)
    # storing pickle model file
    outfile = open(MODEL_PATH,'wb')
    pickle.dump({'training_params':train_param,'model':PricePrediction},outfile)
    outfile.close()
    mlflow.log_artifact(MODEL_PATH)

MlflowException: Could not create run under non-active experiment with ID 0.

## Decision trees

In [44]:
PricePrediction = TrainingPipeline()
EXPERIMENT_NAME = 'tree_price_pred'
MODEL_PATH = 'models/trees/lgbm_price_pred.pkl'
with mlflow.start_run(run_name= EXPERIMENT_NAME):
    train_param = {"scale": True,
                "test_size": 0.20,
                "features" :features_name,
                "target": ["price"],
                "feature_selection": True,
                "hypotesis_model": "trees",
                "hyperparam": {}}

    PricePrediction.fit(df, train_param)
    PricePrediction.Model_metrics()
    fig1, fig2 = PricePrediction.shap_explain_plot(PricePrediction.X_test)
    if fig1 is not None:
        fig1.show()
    if fig2 is not None:
        fig2.show()
    # Log model
    mlflow.pyfunc.log_model(EXPERIMENT_NAME, python_model = PricePrediction, signature=PricePrediction.signature)
    # Log metrics
    mlflow.log_metric('mae_train', PricePrediction.mae_score_train)
    mlflow.log_metric('mae_test', PricePrediction.mae_score_test)
    mlflow.log_metric('mape_train', PricePrediction.mae_score_train)
    mlflow.log_metric('mape_test', PricePrediction.mae_score_test)
    # Log model parameters
    mlflow.log_params(PricePrediction.hyperparam)
    # storing pickle model file
    outfile = open(MODEL_PATH,'wb')
    pickle.dump({'training_params':train_param,'model':PricePrediction},outfile)
    outfile.close()
    mlflow.log_artifact(MODEL_PATH)

MlflowException: Could not create run under non-active experiment with ID 0.

In [None]:
run_id = mlflow.search_runs(filter_string = f'tags.mlflow.runName = "{EXPERIMENT_NAME}"').iloc[0].run_id
print(run_id)
mlflow.register_model(f"runs:/{run_id}/{MODEL_NAME}", MODEL_NAME)

# Selecting best experiment

In [8]:
import mlflow
import pandas as pd

from mlflow.tracking import MlflowClient


EXPERIMENT_NAME = "trees_price_pred"

client = MlflowClient()

# Retrieve Experiment information
EXPERIMENT_ID = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

# Retrieve Runs information (parameter 'depth', metric 'accuracy')
ALL_RUNS_INFO = client.list_run_infos(EXPERIMENT_ID)
ALL_RUNS_ID = [run.run_id for run in ALL_RUNS_INFO]
#ALL_PARAM = [client.get_run(run_id).data.params["depth"] for run_id in ALL_RUNS_ID]
#ALL_METRIC = [client.get_run(run_id).data.metrics["accuracy"] for run_id in ALL_RUNS_ID]

# View Runs information
#DF = pd.DataFrame({"Run ID": ALL_RUNS_ID, "Params": ALL_PARAM, "Metrics": ALL_METRIC})

# Retrieve Artifact from best run
#best_run_id = DF.sort_values("Metrics", ascending=False).iloc[0]["Run ID"]
#best_model_path = client.download_artifacts(best_run_id, "classifier")
#best_model = mlflow.sklearn.load_model(best_model_path)

# Delete runs (DO NOT USE UNLESS CERTAIN)
#for run_id in ALL_RUNS_ID:
#    client.delete_run(run_id)

# Delete experiment (DO NOT USE UNLESS CERTAIN)
#client.delete_experiment(EXPERIMENT_ID)

AttributeError: 'NoneType' object has no attribute 'experiment_id'

In [28]:
run_id = client.search_runs(experiment_ids=['0'])

In [29]:
run_id

[]

In [30]:
all_experiments = [exp.experiment_id for exp in MlflowClient().list_experiments()]
all_experiments

  all_experiments = [exp.experiment_id for exp in MlflowClient().list_experiments()]


[]

In [9]:
client.delete_experiment("0")

MlflowException: Could not find experiment with ID 0