In [14]:
import pandas as pd
import mlflow
import math


from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRFRegressor, XGBRegressor

In [15]:
df = pd.read_csv('../data/processed/casas.csv')
print('vol: ',df.shape[0])
df.head(3)

vol:  1460


Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500


In [16]:
X = df.drop('preco',axis=1)
y = df['preco'].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Experiment 1 - Linear Regression

In [17]:
mlflow.set_experiment('house-prices-eda')
mlflow.start_run()

lr = LinearRegression()
lr.fit(X_train, y_train)

mlflow.sklearn.log_model(lr,'lr')
lr_predicted = lr.predict(X_test)

mse = mean_squared_error(y_test, lr_predicted)
rmse =  math.sqrt(mse)
r2 = r2_score(y_test, lr_predicted)

print('mse: ',mse,'\nrmse: ',rmse,'\nr2: ',r2)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)
mlflow.end_run()

2023/10/13 10:48:08 INFO mlflow.tracking.fluent: Experiment with name 'house-prices-eda' does not exist. Creating a new experiment.


mse:  2078666917.9289913 
rmse:  45592.399782518485 
r2:  0.7021153642898048




# Experiment 2 - XGB Regression

In [18]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')

    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    
    print('mse: ',mse,'\nrmse: ',rmse,'\nr2: ',r2)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)

mse:  1386727460.1346002 
rmse:  37238.789724353286 
r2:  0.8012741720529797


<H1>Get experiments</H1>

In [29]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///c:/Users/renan.vital/Documents/Pessoal/ml_flow/mlflow/notebooks/mlruns/154567500677710227', creation_time=1697204888510, experiment_id='154567500677710227', last_update_time=1697204888510, lifecycle_stage='active', name='house-prices-eda', tags={}>

In [75]:
mlflow.get_experiment('154567500677710227')

<Experiment: artifact_location='file:///c:/Users/renan.vital/Documents/Pessoal/ml_flow/mlflow/notebooks/mlruns/154567500677710227', creation_time=1697204888510, experiment_id='154567500677710227', last_update_time=1697204888510, lifecycle_stage='active', name='house-prices-eda', tags={}>

In [76]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.rmse,metrics.r2,metrics.mse,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.log-model.history
0,6af4b98f4a3f45d8bd342740878f61f2,154567500677710227,FINISHED,file:///c:/Users/renan.vital/Documents/Pessoal...,2023-10-13 13:57:52.181000+00:00,2023-10-13 13:58:18.156000+00:00,37238.789724,0.801274,1386727000.0,renan.vital,unique-toad-787,c:\Users\renan.vital\Anaconda3\envs\mlflow\lib...,LOCAL,"[{""run_id"": ""6af4b98f4a3f45d8bd342740878f61f2""..."
1,b41234fe5bab4e659168242897a9fd65,154567500677710227,FINISHED,file:///c:/Users/renan.vital/Documents/Pessoal...,2023-10-13 13:48:10.109000+00:00,2023-10-13 13:49:18.551000+00:00,45592.399783,0.702115,2078667000.0,renan.vital,abundant-gnu-647,c:\Users\renan.vital\Anaconda3\envs\mlflow\lib...,LOCAL,"[{""run_id"": ""b41234fe5bab4e659168242897a9fd65""..."


In [74]:
mlflow.get_run('b41234fe5bab4e659168242897a9fd65')

<Run: data=<RunData: metrics={'mse': 2078666917.9289913,
 'r2': 0.7021153642898048,
 'rmse': 45592.399782518485}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "b41234fe5bab4e659168242897a9fd65", '
                             '"artifact_path": "lr", "utc_time_created": '
                             '"2023-10-13 13:48:10.306262", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"predict_fn": "predict", "loader_module": '
                             '"mlflow.sklearn", "python_version": "3.10.9", '
                             '"env": {"conda": "conda.yaml", "virtualenv": '
                             '"python_env.yaml"}}, "sklearn": '
                             '{"pickled_model": "model.pkl", '
                             '"sklearn_version": "1.2.1", '
                             '"serialization_format": "cloudpickle", "code": '
                             'null}}, "model_uuid": '
             

In [73]:
mlflow.get_run('6af4b98f4a3f45d8bd342740878f61f2')

<Run: data=<RunData: metrics={'mse': 1386727460.1346002,
 'r2': 0.8012741720529797,
 'rmse': 37238.789724353286}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "6af4b98f4a3f45d8bd342740878f61f2", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2023-10-13 13:57:52.620251", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.10.9", '
                             '"data": "model.xgb", "env": {"conda": '
                             '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                             '"xgboost": {"xgb_version": "1.7.3", "data": '
                             '"model.xgb", "model_class": '
                             '"xgboost.sklearn.XGBRegressor", "model_format": '
                             '"xgb", "code": null}}, "model_uuid": '
                             '"bb34a964eed6420e94bf8a