# House Price Prediction with LightGBM

## 1. Dependencies

This notebook has been tested with **Python 3.8.16** and the following package versions:

In [None]:
%%capture
!pip install cloudpickle==1.5.0
!pip install lightgbm==2.2.3
!pip install pandas==1.3.5
!pip install scikit-learn==1.0.2
!pip install verta==0.21.1
!pip install wget==3.2

## 2. Imports

In [None]:
import cloudpickle
import lightgbm as lgb
import os
import pandas as pd
import wget

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from verta import Client
from verta.environment import Python
from verta.registry import VertaModelBase, verify_io
from verta.utils import ModelAPI

## 3. Model Training

### 3.1 Load Training Data

In [None]:
file_name = 'melb-data.csv'

if not os.path.isfile(file_name):
    wget.download(f"http://s3.amazonaws.com/verta-starter/{file_name}")

df = pd.read_csv(file_name)

In [None]:
df = df.dropna(axis=0)

In [None]:
target_col = 'Price'
feature_cols = ['Rooms', 'Bathroom', 'Landsize', 'BuildingArea', 'Distance', 'YearBuilt', 'Car', 'Propertycount']

In [None]:
X = df.loc[:,feature_cols]
y = df.loc[:,target_col]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
lgb_train = lgb.Dataset(X_train, y_train)

In [None]:
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

### 3.2 Train/Test Code

In [None]:
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': {'l2', 'l1'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

In [None]:
gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round = 20,
    valid_sets = lgb_eval,
    callbacks = [lgb.early_stopping(stopping_rounds = 5)]
)

In [None]:
y_pred = gbm.predict(X_test)

In [None]:
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = mean_squared_error(y_test, y_pred, squared=False) 

In [None]:
print(f"MAE: {round(MAE, 3)}")
print(f"MSE: {round(MSE, 3)}")
print(f"RMSE: {round(RMSE, 3)}")

## 4. Verta Set Up

In [None]:
os.environ['VERTA_HOST'] = ''
os.environ['VERTA_EMAIL'] = ''
os.environ['VERTA_DEV_KEY'] = ''

In [None]:
client = Client(os.environ['VERTA_HOST'])

## 5. Model Register

In [None]:
with open('model.pkl', 'wb') as f:
    cloudpickle.dump(gbm, f)

In [None]:
class Predictor(VertaModelBase):
    def __init__(self, artifacts):
        self.model = cloudpickle.load(open(artifacts['serialized_model'], 'rb'))
    
    @verify_io
    def predict(self, data):
        results = []

        for item in data:
            results.append(self.model.predict(item).tolist())
        
        return results

    def describe(self):
        return {
            'method': 'predict',
            'args': 'Rooms,Bathroom,Landsize,BuildingArea,Distance,YearBuilt,Car,Propertycount',
            'returns': 'PricePrediction',
            'description': 'Predicts house prices based on LightGMB trained model.',
            'input_description': 'List of house information, such as number of rooms, building area, land size, etc.',
            'output_description': 'House price prediction.'
        }
        
    def example(self):
        return [[[3.0, 1.0, 206.0, 110.0, 8.4, 1980.0, 1.0, 8801.0]]]

In [None]:
artifacts = {'serialized_model': 'model.pkl'}

In [None]:
registered_model = client.get_or_create_registered_model(name='LightGBM')

In [None]:
model = registered_model.create_standard_model(
    name = 'v0',
    model_cls = Predictor,
    environment = Python(requirements=['scikit-learn', 'lightgbm']),
    model_api = ModelAPI(X_train, pd.DataFrame(data = y_train.tolist(), columns = ['PricePrediction'])),
    artifacts = artifacts
)

## Model Endpoint

In [None]:
endpoint = client.get_or_create_endpoint('light-gbm')

In [None]:
endpoint.update(model, wait=True)

In [None]:
deployed_model = endpoint.get_deployed_model()

In [None]:
deployed_model.predict([X_train.values.tolist()[:5]])