# Sources

- [Master Polynomial Regression With Easy-to-Follow Tutorials](https://www.analyticsvidhya.com/blog/2021/07/all-you-need-to-know-about-polynomial-regression)

In [1]:
# Import all necessary libraries
import numpy as np

# Visualize the data in 3-d space
import plotly.express as px

# Draw the surface in 3-d space
import plotly.graph_objects as go

# Linear regression model
from sklearn.linear_model import LinearRegression

# Polynomial regression model
from sklearn.preprocessing import PolynomialFeatures

# Build a polynomial regression pipeline
from sklearn.pipeline import make_pipeline

# Split dataset into train & test datasets
from sklearn.model_selection import train_test_split

# Frame data
import pandas as pd

# Evaluate the model
from sklearn import metrics


In [2]:
# Create a dataset
x = 7 * np.random.rand(100, 1) - 2.8 # in range [-2.8, 4.2]
y = 7 * np.random.rand(100, 1) - 2.8 # in range [-2.8, 4.2]

# Data points follow the equation with random noise values
noise = np.random.randn(100, 1)
z = x**2 + y**2 + 0.2*x + 0.1*x*y + 2 + noise

In [3]:
# Shape of the data point
df = px.data.iris()

# .ravel() flatten a multi-dimensional array to 1D array
fig = px.scatter_3d(df, x=x.ravel(), y=y.ravel(), z=z.ravel())

# Visualize the data in 3-d space using a 3-D scatter plot
fig.show()

In [4]:
# Linear Regression with multiple variables

# Set up independent variables

X = np.concatenate((x, y), axis=1)
Y = z.flatten()

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Linear Regression
model_lr = LinearRegression()
model_lr.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = model_lr.predict(X_test)

# Evaluate the model
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, Y_pred))
print('Root Mean Squared Error:', metrics.mean_squared_error(Y_test, Y_pred, squared=False))

# Print the coefficients and intercept
print('Coefficients:', model_lr.coef_)
print('Intercept:', model_lr.intercept_)


Mean Absolute Error: 4.768128154601042
Mean Squared Error: 29.15307101926778
Root Mean Squared Error: 5.39935838959295
Coefficients: [1.98707707 1.15580642]
Intercept: 9.187819075535304



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



In [5]:
# Linear Regression with multiple variables (continue)

# color=np.empty(Y_train.size).fill(1)
# fig.scatter_3d(df, x=X_train[:, 0], y=X_train[:, 1], z=Y_train, color=color, title="Training Data")

# Visualize data
fig = go.Figure()
fig.add_scatter3d(x=X_train[:,0], y=X_train[:,1], z=Y_train, mode="markers", name="Training Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_test, mode="markers", name="Test Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_pred, mode="markers", name="Predicted Data")
fig.show()

In [6]:
# Polynomial Regression with multiple variables
model_poly = PolynomialFeatures(degree=3, include_bias=True)

# Build a polynomial regression pipeline
pipeline = make_pipeline(model_poly, model_lr)

# Use the pipeline to build the model
pipeline.fit(X_train, Y_train)

# Test the model with our test data
Y_pred = pipeline.predict(X_test)

# Calculate and print the mean squared error
mse = np.sqrt(metrics.mean_squared_error(Y_test, Y_pred))
print(f'Mean error: {mse:3.3} ({mse/np.mean(Y_pred)*100:3.3}%)')

# Score the model
score = pipeline.score(X_train, Y_train)
print('Model determination: ', score)


Mean error: 1.16 (10.0%)
Model determination:  0.979697888846009


In [7]:
# Polynomial Regression with multiple variables (continue)

# color=np.empty(Y_train.size).fill(1)
# fig.scatter_3d(df, x=X_train[:, 0], y=X_train[:, 1], z=Y_train, color=color, title="Training Data")

# Visualize data
fig = go.Figure()
fig.add_scatter3d(x=X_train[:,0], y=X_train[:,1], z=Y_train, mode="markers", name="Training Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_test, mode="markers", name="Test Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_pred, mode="markers", name="Predicted Data")
fig.show()