# Polynomial regression

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

## Why Polynomial Regression?

In [None]:
# To understand the need for polynomial regression, let’s generate some random dataset first.
np.random.seed(0)
x = 2 - 3 * np.random.normal(0, 1, 20)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 20)
plt.scatter(x,y, s=10)
plt.show()

In [None]:
# Let’s apply a linear regression model to this dataset.
from sklearn.linear_model import LinearRegression

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]

model = LinearRegression()
model.fit(x, y)
y_pred = model.predict(x)

plt.scatter(x, y, s=10)
plt.plot(x, y_pred, color='r')
plt.show()

## What is Polynomial Regression?

- Polynomial Regression is a form of regression analysis in which the relationship between the independent variables and dependent variables are **modeled in the nth degree polynomial**.


Here’s the general equation for a polynomial regression model.

$$f_{\mathbf{w},b} = w_0x_0 + w_1x_1+ ... + w_{n-1}x_{n-1} + b \tag{1}$$

In [None]:
x[:4]

In [None]:
import operator
from sklearn.preprocessing import PolynomialFeatures

polynomial_features= PolynomialFeatures(degree=2, include_bias=False)
x_poly = polynomial_features.fit_transform(x)

In [None]:
# the default "include_bias=True" adds a feature that's constantly 1
x_poly[:4]

In [None]:
model = LinearRegression()
model.fit(x_poly, y)
y_poly_pred = model.predict(x_poly)

plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.show()

## Polynomial Regression with sklearn

In [None]:
from helpers.datasets import make_wave
from sklearn.preprocessing import PolynomialFeatures

X, y = make_wave(n_samples=100)
line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1)

X[:5]

In [None]:
# include polynomials up to x ** 10:
# the default "include_bias=True" adds a feature that's constantly 1
poly = PolynomialFeatures(degree=10, include_bias=False)
poly.fit(X)
X_poly = poly.transform(X)

In [None]:
print(f"X_poly.shape: {X_poly.shape}")

In [None]:
print(f"Entries of X:\n{X[:5]}")
print(f"Entries of X_poly:\n{X_poly[:5]}")

In [None]:
print(f"Polynomial feature names:\n{poly.get_feature_names_out()}")

In [None]:
reg = LinearRegression().fit(X_poly, y)

line_poly = poly.transform(line)
plt.plot(line, reg.predict(line_poly), label='polynomial linear regression')
plt.plot(X[:, 0], y, 'o', c='k')
plt.ylabel("Regression output")
plt.xlabel("Input feature")
plt.legend(loc="best")
plt.show()

In [None]:
from ipywidgets import interact

def plot_ploynomal_interactive():
    def interactive_polynomal(degree):
        poly = PolynomialFeatures(degree=degree, include_bias=False)
        poly.fit(X)
        X_poly = poly.transform(X)
        
        reg = LinearRegression().fit(X_poly, y)
        line_poly = poly.transform(line)
        plt.plot(line, reg.predict(line_poly), label='polynomial linear regression')
        plt.plot(X[:, 0], y, 'o', c='k')
        plt.ylabel("Regression output")
        plt.xlabel("Input feature")
        plt.legend(loc="best")
    return interact(interactive_polynomal, degree=list(range(1,20)))

plot_ploynomal_interactive()
plt.show()

In [None]:
from sklearn.svm import SVR

for gamma in [1, 10]:
    svr = SVR(gamma=gamma).fit(X, y)
    plt.plot(line, svr.predict(line), label='SVR gamma={}'.format(gamma))

plt.plot(X[:, 0], y, 'o', c='k')
plt.ylabel("Regression output")
plt.xlabel("Input feature")
plt.legend(loc="best")
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]
    
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# rescale data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
poly = PolynomialFeatures(degree=2).fit(X_train_scaled)
X_train_poly = poly.transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)
print(f"X_train.shape: {X_train.shape}")
print(f"X_train_poly.shape: {X_train_poly.shape}")

In [None]:
print(f"Polynomial feature names:\n{poly.get_feature_names_out()}")

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge().fit(X_train_scaled, y_train)
print(f"Score without interactions: {ridge.score(X_test_scaled, y_test):.3f}")

ridge = Ridge().fit(X_train_poly, y_train)
print(f"Score with interactions: {ridge.score(X_test_poly, y_test):.3f}")

## Disadvantages of polynomial regression