# 225. Model Deployment Section Overview

# 226. Model Deployment Considerations

# 227. Model Persistence

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [56]:
df = pd.read_csv('../DATA/Advertising.csv')

In [57]:
df

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [58]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TV,200.0,147.0425,85.854236,0.7,74.375,149.75,218.825,296.4
radio,200.0,23.264,14.846809,0.0,9.975,22.9,36.525,49.6
newspaper,200.0,30.554,21.778621,0.3,12.75,25.75,45.1,114.0
sales,200.0,14.0225,5.217457,1.6,10.375,12.9,17.4,27.0


In [59]:
X = df.drop('sales',axis=1)

In [60]:
y = df['sales']

In [61]:
from sklearn.model_selection import train_test_split

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [63]:
X_validation, X_holdout_test, y_validation, y_holdout_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [64]:
len(X)

200

In [65]:
len(X_train)

140

In [66]:
len(X_validation)

30

In [67]:
len(X_holdout_test)

30

# Model training

In [68]:
from sklearn.ensemble import RandomForestRegressor

In [69]:
model = RandomForestRegressor(n_estimators=30, random_state=101)

In [70]:
model.fit(X_train,y_train)

In [71]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [72]:
validation_predictions = model.predict(X_validation)

In [73]:
pd.DataFrame(validation_predictions,columns=['Predictions'], index=X_validation.index)

Unnamed: 0,Predictions
84,22.233333
56,4.94
65,9.93
73,11.343333
182,9.113333
55,22.686667
126,5.6
197,13.14
137,20.316667
132,4.63


In [74]:
mean_squared_error(y_validation,validation_predictions) # 0.7106296296296291

0.4531977777777767

In [75]:
mean_absolute_error(y_validation,validation_predictions) # 0.6833333333333329

0.5815555555555547

In [76]:
np.sqrt(mean_squared_error(y_validation,validation_predictions)) # 0.8429885109713116

0.6731996566976076

In [77]:
df.describe()['sales']

count    200.000000
mean      14.022500
std        5.217457
min        1.600000
25%       10.375000
50%       12.900000
75%       17.400000
max       27.000000
Name: sales, dtype: float64

# Final performance metrics (Holdout set)

In [78]:
holdout_predictions = model.predict(X_holdout_test)

In [79]:
mean_squared_error(y_holdout_test,holdout_predictions)

0.6564029629629633

In [80]:
mean_absolute_error(y_holdout_test,holdout_predictions)

0.7224444444444451

In [82]:
np.sqrt(mean_squared_error(y_holdout_test,holdout_predictions))

0.8101869925905768

In [83]:
final_model = RandomForestRegressor(n_estimators=30, random_state=101)

In [84]:
final_model.fit(X,y)

In [85]:
import joblib

In [86]:
joblib.dump(final_model, 'final_model.pkl')

['final_model.pkl']

In [88]:
list(X.columns)

['TV', 'radio', 'newspaper']

In [89]:
joblib.dump(list(X.columns), 'col_names.pkl')

['col_names.pkl']

# Loading the model

In [90]:
new_columns = joblib.load('col_names.pkl')

In [91]:
new_columns

['TV', 'radio', 'newspaper']

In [92]:
loaded_model = joblib.load('final_model.pkl')

In [94]:
loaded_model.predict([[230.1, 37.8, 69.2]])



array([21.99])

# 228. Model Deployment as an API - General Overview

# 230. Model API - Creating the Script