<a href="https://colab.research.google.com/github/jaw039/Google-Colab-Notebooks/blob/main/CSE151A_Constructing_Polynomial_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Constructing a Polynomial Regression: An Example

In [None]:
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures


## Creating a synthetic dataset

In [None]:
#Building a dummy dataset with 100 data points

np.random.seed(0)
# np.random.normal creates a random normally distributed dataset
# ( begin, stepsize, # of datapoints)
x = 2 - 3 * np.random.normal(0, 1, 100)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 100)

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]

plt.scatter(x,y, s=10)
plt.show()

## Linear Regression

In [None]:
#spliting the data [80:20]
x_train = x[:-20]
y_train = y[:-20]

x_test = x[-20:]
y_test = y[-20:]
yhat_train_pred = None

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)
print("Model weights: ")
print(model.coef_)

#here, we call predict on the training set because we want to see the fit of the model on the training set.
yhat_train_pred = model.predict(x_train)
yhat_test_pred = model.predict(x_test)

# Let's plot our subset of data in this case 80% of our data.
# Does it look the same as above?
plt.scatter(x_train, y_train, s=10)
plt.show()


In [None]:
# Here we plot our regression line
# How does it fit? we can evaluate our MSE but also simply see that we can do better!
plt.scatter(x_train, y_train, s=10)
plt.plot(x_train, yhat_train_pred, color='r')
plt.show()

In [None]:
# Let's check our error
mse = mean_squared_error(y_train,yhat_train_pred)
rmse = np.sqrt(mean_squared_error(y_train,yhat_train_pred))
r2 = r2_score(y_train,yhat_train_pred)
print('Training MSE: %8.15f' % mse)
print('Training RMSE: %8.15f' % rmse)
print('Training R2: %8.15f' % r2)

In [None]:
mse = mean_squared_error(y_test,yhat_test_pred)
rmse = np.sqrt(mean_squared_error(y_test,yhat_test_pred))
r2 = r2_score(y_test,yhat_test_pred)
print('Test MSE: %8.15f' % mse)
print('Test RMSE: %8.15f' % rmse)
print('Test R2: %8.15f' % r2)

## Constructing a 2nd order Polynomail Regression

In [None]:
#Rebuilding a dummy dataset with 100 data points

np.random.seed(0)
# np.random.normal creates a random normally distributed dataset
# ( begin, stepsize, # of datapoints)
x = 2 - 3 * np.random.normal(0, 1, 100)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 100)

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]


In [None]:
#x

In [None]:
#  Generate a new feature matrix consisting of all polynomial combinations of the
## features with degree less than or equal to the specified degree. For example,
## if an input sample is two dimensional and of the form [a, b],
## the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].

polynomial_features= PolynomialFeatures(degree=2)
x_poly = polynomial_features.fit_transform(x)
print('Polynomial Features: [1,x,x**2]')
print(x_poly[0])

In [None]:
#Subset the data
x_poly_train = x_poly[:-20]
y_train = y[:-20]

x_poly_test = x_poly[-20:]
y_test = y[-20:]

In [None]:
model = LinearRegression()
model.fit(x_poly_train, y_train)
yhat_train_pred = model.predict(x_poly_train)
yhat_test_pred = model.predict(x_poly_test)

print("Model weights: ")
print(model.coef_)

In [None]:
mse = mean_squared_error(y_train,yhat_train_pred)
rmse = np.sqrt(mean_squared_error(y_train,yhat_train_pred))
r2 = r2_score(y_train,yhat_train_pred)
print('Training MSE: %8.15f' % mse)
print('Training RMSE: %8.15f' % rmse)
print('Training R2: %8.15f' % r2)

In [None]:
mse = mean_squared_error(y_test,yhat_test_pred)
rmse = np.sqrt(mean_squared_error(y_test,yhat_test_pred))
r2 = r2_score(y_test,yhat_test_pred)
print('Testing MSE: %8.15f' % mse)
print('Testing RMSE: %8.15f' % rmse)
print('Testing R2: %8.15f' % r2)

In [None]:
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,yhat_train_pred), key=sort_axis)
x_poly_train, yhat_train_pred = zip(*sorted_zip)
plt.plot(x_poly_train, yhat_train_pred, color='m')
plt.show()

## Polynomial Regresion 3rd order

In [None]:
#Building a dummy dataset with 100 data points

np.random.seed(0)
# np.random.normal creates a random normally distributed dataset
# ( begin, stepsize, # of datapoints)
x = 2 - 3 * np.random.normal(0, 1, 100)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 100)

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]


In [None]:
#  Generate a new feature matrix consisting of all polynomial combinations of the
## features with degree less than or equal to the specified degree.
polynomial_features= PolynomialFeatures(degree=3)
x_poly = polynomial_features.fit_transform(x)
print('Polynomial Features: [1,x,x**2,x**3]')
print(x_poly[0])

In [None]:
# subset the data
x_poly_train = x_poly[:-20]
y_train = y[:-20]

x_poly_test = x_poly[-20:]
y_test = y[-20:]

In [None]:
model = LinearRegression()
model.fit(x_poly_train, y_train)
yhat_train_pred = model.predict(x_poly_train)
yhat_test_pred = model.predict(x_poly_test)

print("Model weights: ")
print(model.coef_)

In [None]:
mse = mean_squared_error(y_train,yhat_train_pred)
rmse = np.sqrt(mean_squared_error(y_train,yhat_train_pred))
r2 = r2_score(y_train,yhat_train_pred)
print('Training MSE: %8.15f' % mse)
print('Training RMSE: %8.15f' % rmse)
print('Training R2: %8.15f' % r2)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test,yhat_test_pred))
mse = mean_squared_error(y_test,yhat_test_pred)
r2 = r2_score(y_test,yhat_test_pred)
print('Testing MSE: %8.15f' % mse)
print('Testing RMSE: %8.15f' % rmse)
print('Testing R2: %8.15f' % r2)

In [None]:
x.mean()

In [None]:
np.shape(x)

In [None]:
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,yhat_train_pred), key=sort_axis)
x_poly_train, yhat_train_pred = zip(*sorted_zip)
plt.plot(x_poly_train, yhat_train_pred, color='m')
plt.show()

## Multiorder polynomial

In [None]:
#Building a dummy dataset with 100 data points

np.random.seed(0)
# np.random.normal creates a random normally distributed dataset
# ( begin, stepsize, # of datapoints)
x = 2 - 3 * np.random.normal(0, 1, 100)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 100)

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]


In [None]:
#Using the same Dummy Dataset
np.random.seed(0)
x = 2 - 3 * np.random.normal(0, 1, 100)
y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 100)

# transforming the data to include another axis
x = x[:, np.newaxis]
y = y[:, np.newaxis]


In [None]:
#  Generate a new feature matrix consisting of all polynomial combinations of the
## features with degree less than or equal to the specified degree.
polynomial_features= PolynomialFeatures(degree=15)
x_poly = polynomial_features.fit_transform(x)

x_poly_train = x_poly[:-20]
y_train = y[:-20]

x_poly_test = x_poly[-20:]
y_test = y[-20:]

In [None]:
model = LinearRegression()
model.fit(x_poly_train, y_train)
yhat_train_pred = model.predict(x_poly_train)
yhat_test_pred = model.predict(x_poly_test)

print("Model weights: ")
print(model.coef_)

In [None]:
mse = mean_squared_error(y_train,yhat_train_pred)
rmse = np.sqrt(mean_squared_error(y_train,yhat_train_pred))
r2 = r2_score(y_train,yhat_train_pred)
print('Testing MSE: %8.15f' % mse)
print('Testing RMSE: %8.15f' % rmse)
print('Testing R2: %8.15f' % r2)

In [None]:
mse = np.sqrt(mean_squared_error(y_test,yhat_test_pred))
rmse = np.sqrt(mean_squared_error(y_test,yhat_test_pred))
r2 = r2_score(y_test,yhat_test_pred)
print('Training MSE: %8.15f' % mse)
print('Training RMSE: %8.15f' % rmse)
print('Training R2: %8.15f' % r2)

In [None]:
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,yhat_train_pred), key=sort_axis)
x_poly_train, yhat_train_pred = zip(*sorted_zip)
plt.plot(x_poly_train, yhat_train_pred, color='m')
plt.show()