In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../08-linear-regression-models/Advertising.csv')

In [3]:
X = df.drop('sales', axis=1)

In [4]:
y = df['sales']

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
polynomial_converter = PolynomialFeatures(degree=2, include_bias=False)

In [7]:
polynomial_converter.fit(X)

PolynomialFeatures(include_bias=False)

In [8]:
poly_features = polynomial_converter.transform(X)

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)

In [11]:
from sklearn.linear_model import LinearRegression

In [12]:
model = LinearRegression()

In [13]:
model.fit(X_train, y_train)

LinearRegression()

In [14]:
test_prediction = model.predict(X_test)

In [15]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [16]:
MAE = mean_absolute_error(y_test, test_prediction)

In [17]:
MSE = mean_squared_error(y_test, test_prediction)

In [18]:
RMSE = np.sqrt(MSE)

In [19]:
# create the difference order polynomial
# split polynomial feat into train/test
# fit on the training data
# store/save the rmse for both the train and test
# plot the results error vs polynomial order

In [20]:
train_rmse_errors = []
test_rmse_errors = []

In [21]:
for degree in range(1, 10):
    polynomial_converter = PolynomialFeatures(degree=degree, include_bias=False)
    poly_features = polynomial_converter.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)
    model = LinearRegression()
    
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    
    train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
    test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
    
    train_rmse_errors.append(train_rmse)
    test_rmse_errors.append(test_rmse)

# This lecture starts here

In [22]:
final_poly_converter = PolynomialFeatures(degree=3, include_bias=False)

In [23]:
final_model = LinearRegression()

In [24]:
full_converted_X = final_poly_converter.fit_transform(X)
final_model.fit(full_converted_X, y)

LinearRegression()

In [25]:
from joblib import dump, load

In [26]:
dump(final_model, 'final_poly_model.joblib')

['final_poly_model.joblib']

In [27]:
dump(final_poly_converter, 'final_poly_converter.joblib')

['final_poly_converter.joblib']

In [28]:
loaded_converter = load('final_poly_converter.joblib')

In [29]:
loaded_model = load('final_poly_model.joblib')

In [30]:
campaign = [[149, 22, 12]]

In [34]:
loaded_converter.fit_transform(campaign)

array([[1.490000e+02, 2.200000e+01, 1.200000e+01, 2.220100e+04,
        3.278000e+03, 1.788000e+03, 4.840000e+02, 2.640000e+02,
        1.440000e+02, 3.307949e+06, 4.884220e+05, 2.664120e+05,
        7.211600e+04, 3.933600e+04, 2.145600e+04, 1.064800e+04,
        5.808000e+03, 3.168000e+03, 1.728000e+03]])

In [35]:
transformed_data = loaded_converter.fit_transform(campaign)

In [36]:
loaded_model.predict(transformed_data)

array([14.64501014])