In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/Housing.csv')
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7229300521,20141013T000000,231300.0,2,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [3]:
df.columns

Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15'],
      dtype='object')

In [4]:
df_clean = df[['price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 
       'lat', 'long', 'sqft_living15', 'sqft_lot15']].copy()

In [5]:
X = df_clean.drop('price',axis=1)
y = df_clean['price'].copy()

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
import mlflow

In [8]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [9]:
mlflow.set_experiment('house-pott')

<Experiment: artifact_location='mlflow-artifacts:/265932537066601344', creation_time=1719358148645, experiment_id='265932537066601344', last_update_time=1719358148645, lifecycle_stage='active', name='house-pottencial', tags={}>

# Linear Regression

In [10]:
mlflow.start_run()

<ActiveRun: >

In [11]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

In [12]:
mlflow.sklearn.log_model(lr,'lr')

<mlflow.models.model.ModelInfo at 0x20f74609880>

In [13]:
lr_predicted = lr.predict(X_test)

In [14]:
from sklearn.metrics import mean_squared_error, r2_score

In [15]:
import math

mse = mean_squared_error(y_test, lr_predicted)
rmse =  math.sqrt(mse)
r2 = r2_score(y_test, lr_predicted)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)

In [16]:
mlflow.end_run()

# XGBoost

In [17]:
from xgboost import XGBRFRegressor, XGBRegressor

In [18]:

with mlflow.start_run():
    xgb = XGBRegressor()
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)



In [19]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgboost-tunning')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)
    mlflow.log_param('random_state',42)



In [20]:
xgb_params = {
    'learning_rate':0.2,
    'n_estimators': 50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRFRegressor(**xgb_params)
    xgb.fit(X_train, y_train)
    mlflow.xgboost.log_model(xgb,'xgbrfr-tunning')
    xgb_predicted = xgb.predict(X_test)
    mse = mean_squared_error(y_test, xgb_predicted)
    rmse =  math.sqrt(mse)
    r2 = r2_score(y_test, xgb_predicted)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)
    mlflow.log_params(xgb_params)



In [21]:
mlflow.get_experiment_by_name('house-pottencial')

<Experiment: artifact_location='mlflow-artifacts:/265932537066601344', creation_time=1719358148645, experiment_id='265932537066601344', last_update_time=1719358148645, lifecycle_stage='active', name='house-pottencial', tags={}>

In [22]:
mlflow.get_run('c7e2607ab3084fa18b4d87e55dcf1c65')

RestException: RESOURCE_DOES_NOT_EXIST: Run 'c7e2607ab3084fa18b4d87e55dcf1c65' not found