In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/processed/casas.csv')
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [3]:
X = df.drop('preco', axis = 1)
y = df['preco'].copy()

In [4]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
X_train.shape

(1022, 3)

In [8]:
X_test.shape

(438, 3)

## Mlflow

In [9]:
import mlflow

In [10]:
mlflow.set_experiment('house-prices-eda')
# EDA = Análise Exploratória de Dados
# Estamos iniciando um experimento

<Experiment: artifact_location='file:///home/dgamorim/development/mlflow/notebooks/mlruns/629301400594929119', creation_time=1696353843836, experiment_id='629301400594929119', last_update_time=1696353843836, lifecycle_stage='active', name='house-prices-eda', tags={}>

## Linear Regresion

In [11]:
mlflow.start_run()
# ele vai começar a trackear o nosso experiment

<ActiveRun: >

In [12]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [13]:
mlflow.sklearn.log_model(lr, 'lr')
# aqui ele começa a logar o meu modelo para dentro da plataforma do mlflow

<mlflow.models.model.ModelInfo at 0x7f75a2835870>

In [14]:
lr_predict = lr.predict(X_test)
lr_predict[0:2]

array([119279.7701544 , 289022.71460536])

In [15]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [16]:
y_test.head(1)

892    154500
Name: preco, dtype: int64

In [17]:
from sklearn.metrics import mean_squared_error, r2_score
import math

In [18]:
mse = mean_squared_error(y_test, lr_predict)
rmse = math.sqrt(mse)
r2 = r2_score(y_test, lr_predict)
metrics = {
    'mse' : mse,
    'rmse' : rmse,
    'r2' : r2
}
metrics

{'mse': 2078666917.9289908,
 'rmse': 45592.39978251848,
 'r2': 0.7021153642898048}

In [19]:
mlflow.log_metrics(metrics)
# logando a métrica do experimento da regressão linear

In [20]:
mlflow.end_run()
# parando a execução do experimento da regressão linear

## Xgboost

In [21]:
from xgboost import XGBRFRegressor, XGBRegressor

In [22]:
xgb_params = {
    'learning_rate' : 0.2,
    'n_estimators' : 50,
    'random_state' : 42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    xgb_predict = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predict)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predict)
    metrics = {
    'mse' : mse,
    'rmse' : rmse,
    'r2' : r2
    }
    mlflow.xgboost.log_model(xgb, 'xgboost')
    mlflow.log_metrics(metrics)

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


## Mlflow API

In [23]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///home/dgamorim/development/mlflow/notebooks/mlruns/629301400594929119', creation_time=1696353843836, experiment_id='629301400594929119', last_update_time=1696353843836, lifecycle_stage='active', name='house-prices-eda', tags={}>

In [33]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.rmse,metrics.mse,metrics.r2,tags.mlflow.source.name,tags.mlflow.log-model.history,tags.mlflow.source.type,tags.mlflow.runName,tags.mlflow.source.git.commit,tags.mlflow.user
0,0b553aec49814fa6a764cc660cbc16e7,629301400594929119,FINISHED,file:///home/dgamorim/development/mlflow/noteb...,2023-10-03 17:29:21.494000+00:00,2023-10-03 17:29:25.683000+00:00,37496.442383,1405983000.0,0.798515,/home/dgamorim/development/mlflow/venv/lib/pyt...,"[{""run_id"": ""0b553aec49814fa6a764cc660cbc16e7""...",LOCAL,spiffy-crane-93,aa9dd64992617dff3b184fb216af47c1735ba5ce,dgamorim
1,f02dfd7c1e8e4e96a6ada4199e8c436c,629301400594929119,FINISHED,file:///home/dgamorim/development/mlflow/noteb...,2023-10-03 17:29:17.822000+00:00,2023-10-03 17:29:21.441000+00:00,45592.399783,2078667000.0,0.702115,/home/dgamorim/development/mlflow/venv/lib/pyt...,"[{""run_id"": ""f02dfd7c1e8e4e96a6ada4199e8c436c""...",LOCAL,respected-pig-22,aa9dd64992617dff3b184fb216af47c1735ba5ce,dgamorim
2,c76d1463ff9c443bb02bdf094aa1364c,629301400594929119,FINISHED,file:///home/dgamorim/development/mlflow/noteb...,2023-10-03 17:24:06.132000+00:00,2023-10-03 17:24:08.756000+00:00,37496.442383,1405983000.0,0.798515,/home/dgamorim/development/mlflow/venv/lib/pyt...,"[{""run_id"": ""c76d1463ff9c443bb02bdf094aa1364c""...",LOCAL,welcoming-ram-876,fd015afc15f74c9c48166c99ff1a76ed3b25374d,dgamorim
3,fcff0639700e4e8fb226f40eb7b1db69,629301400594929119,FINISHED,file:///home/dgamorim/development/mlflow/noteb...,2023-10-03 17:24:03.879000+00:00,2023-10-03 17:24:06.106000+00:00,45592.399783,2078667000.0,0.702115,/home/dgamorim/development/mlflow/venv/lib/pyt...,"[{""run_id"": ""fcff0639700e4e8fb226f40eb7b1db69""...",LOCAL,upbeat-wasp-330,fd015afc15f74c9c48166c99ff1a76ed3b25374d,dgamorim


In [34]:
mlflow.get_run('0b553aec49814fa6a764cc660cbc16e7')

<Run: data=<RunData: metrics={'mse': 1405983191.3810997, 'r2': 0.7985147176939275, 'rmse': 37496.44238299281}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "0b553aec49814fa6a764cc660cbc16e7", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2023-10-03 17:29:23.194340", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.10.12", '
                             '"data": "model.xgb", "env": {"conda": '
                             '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                             '"xgboost": {"xgb_version": "2.0.0", "data": '
                             '"model.xgb", "model_class": '
                             '"xgboost.sklearn.XGBRegressor", "model_format": '
                             '"xgb", "code": null}}, "model_uuid": '
                             '"03adc69993e44ac1b1440fab