## Training model

In [1]:
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn import linear_model

# load data
boston = datasets.load_boston()
x = pd.DataFrame(boston.data, columns=boston.feature_names)
column_order = x.columns
y = pd.DataFrame(boston.target, columns=["MEDV"])

lm = linear_model.LinearRegression()
model = lm.fit(x, y)

In [2]:
# save the things that we need at prediction time
import joblib

joblib.dump(model, 'linear_model.joblib') 
joblib.dump(column_order, 'column_order.txt') 

['column_order.txt']

In [None]:
xyz 
# (intentionally breaking the notebook flow)
# restart notebook at this point to clear memory. Start execution from the next cells

## Making predictions

Steps in serving predictions

- load model, metadata (e.g. column order)
- data processing: json payload -> ordered dataframe row -> feature vector
- impute any missing data

In [3]:
import pandas as pd
import joblib

In [4]:
# load the artifacts from disk
model = joblib.load('linear_model.joblib') 
column_order = joblib.load('column_order.txt') 

In [None]:
# let's recall what the features mean
print(boston.DESCR)

In [None]:
request_payload = { 'AGE': 65.2,
                    'B': 396.9,
                    'CHAS': 0,
                    'CRIM': 0.00632,
                    'DIS': 4.09,
                    'INDUS': 2.31,
                    'LSTAT': 4.98,
                    'NOX': 0.538,
                    'PTRATIO': 15.3,
                    'RAD': 1.0,
                    'RM': 6.575,
                    'TAX': 296,
                    'ZN': 18 }

# sort input features in same order as training
input_features = pd.DataFrame([request_payload])
display(input_features)
input_features = input_features[column_order]
display(input_features)

# predict
model.predict(list(input_features.values))

In [None]:
# a better way of creating input features that handles missing data

request_payload = { 'AGE': 65.2,
                    'B': 396.9,
                    'CHAS': 0,
                    'CRIM': 0.00632,
                    'DIS': 4.09,
                    'INDUS': 2.31,
                    'LSTAT': 4.98,
                    'NOX': 0.538,
                    'PTRATIO': 15.3,
                    'RAD': 1.0,
                    'RM': 6.575,
                    'TAX': 296,
                    'ZN': 18 }

# create empty dataframe with the right shape and order (using column_order)
input_features = pd.DataFrame([], columns=column_order)
input_features = input_features.append(request_payload, ignore_index=True)
input_features = input_features.fillna(0)

model.predict(input_features.values.tolist())