In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/processed/casas.csv')

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop('preco',axis=1)
y = df['preco'].copy()

In [5]:
X.head()

Unnamed: 0,tamanho,ano,garagem
0,159.0,2003,2
1,117.0,1976,2
2,166.0,2001,2
3,160.0,1915,3
4,204.0,2000,3


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
X_train.shape

(1022, 3)

In [10]:
X_test.shape

(438, 3)

In [39]:
import mlflow

In [40]:
mlflow.set_experiment('house-prices-eda')

INFO: 'house-prices-eda' does not exist. Creating a new experiment


# Linear Regression

In [41]:
mlflow.start_run()

<ActiveRun: >

In [42]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [46]:
mlflow.sklearn.log_model(lr,'lr')

In [26]:
lr_predicted = lr.predict(X_test)

In [27]:
len(lr_predicted)

438

In [15]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [17]:
y_test

892     154500
1105    325000
413     115000
522     159000
1036    315500
         ...  
331     139000
323     126175
650     205950
439     110000
798     485000
Name: preco, Length: 438, dtype: int64

In [18]:
from sklearn.metrics import mean_squared_error, r2_score

In [47]:
import math

mse = mean_squared_error(y_test, lr_predicted)
rmse =  math.sqrt(mse)
r2 = r2_score(y_test, lr_predicted)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)

In [48]:
mse

2078666917.9289908

In [49]:
rmse

45592.39978251848

In [50]:
r2

0.7021153642898048

In [52]:
mlflow.end_run()

In [None]:
!pip install xgboost

In [51]:
from xgboost import XGBRFRegressor, XGBRegressor

In [54]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)

In [35]:
mse

1297231820.9346623

In [36]:
rmse

36017.10456067592

In [37]:
r2

0.8140993994382723

In [55]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///mnt/d/Alura/2104-mlflowlyfecycle/codigo/mlflow/notebooks/mlruns/1', experiment_id='1', lifecycle_stage='active', name='house-prices-eda', tags={}>

In [58]:
mlflow.list_run_infos('1')

[<RunInfo: artifact_uri='file:///mnt/d/Alura/2104-mlflowlyfecycle/codigo/mlflow/notebooks/mlruns/1/22244b5029fa40dd9aed46c213dc71fa/artifacts', end_time=1614550960185, experiment_id='1', lifecycle_stage='active', run_id='22244b5029fa40dd9aed46c213dc71fa', run_uuid='22244b5029fa40dd9aed46c213dc71fa', start_time=1614550959433, status='FINISHED', user_id='julio'>,
 <RunInfo: artifact_uri='file:///mnt/d/Alura/2104-mlflowlyfecycle/codigo/mlflow/notebooks/mlruns/1/1099511baa174fd285052871e8f236b3/artifacts', end_time=1614550798522, experiment_id='1', lifecycle_stage='active', run_id='1099511baa174fd285052871e8f236b3', run_uuid='1099511baa174fd285052871e8f236b3', start_time=1614550797741, status='FINISHED', user_id='julio'>,
 <RunInfo: artifact_uri='file:///mnt/d/Alura/2104-mlflowlyfecycle/codigo/mlflow/notebooks/mlruns/1/b7aa5d5b2db4406585cebb3396336905/artifacts', end_time=1614550681702, experiment_id='1', lifecycle_stage='active', run_id='b7aa5d5b2db4406585cebb3396336905', run_uuid='b7aa5d

In [59]:
mlflow.get_run('22244b5029fa40dd9aed46c213dc71fa')

<Run: data=<RunData: metrics={'mse': 1386727460.1346002,
 'r2': 0.8012741720529797,
 'rmse': 37238.789724353286}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "22244b5029fa40dd9aed46c213dc71fa", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2021-02-28 22:22:39.805869", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.8.5", '
                             '"data": "model.xgb", "env": "conda.yaml"}, '
                             '"xgboost": {"xgb_version": "1.3.3", "data": '
                             '"model.xgb"}}}]',
 'mlflow.source.name': '/home/julio/miniconda3/lib/python3.8/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'julio'}>, info=<RunInfo: artifact_uri='file:///mnt/d/Alura/2104-mlflowlyfecycle/codigo/mlflow/notebooks/mlruns/1/22244b5029fa40dd9aed46c213dc71