# Implementation of Polynomial Regression

In [12]:
# Import necessary Python libraries
import numpy as np # NumPy for numerical operations
import matplotlib.pyplot as plt # Matplotlib for plotting (although Plotly is used later, it's good practice to import common plotting libraries)
from sklearn.preprocessing import PolynomialFeatures # Imports the class to generate polynomial features
from sklearn.linear_model import LinearRegression # Imports the Linear Regression model

# Create the independant variable x and the dependent variable y
np.random.seed(0) # Set a seed for reproducibility of random data
x = np.linspace(0,10, 100) # Create an array of 100 evenly spaced values between 0 and 10 for the independent variable
y = 3 * x ** 2 + 2 * x + np.random.normal(0, 10, 100) # Create the dependent variable using a quadratic function of x with added random noise

In [13]:
# Reshape the x and the y arrays to column vectors
# This is required by scikit-learn models, which expect input features to be in a 2D array where each row is a sample and each column is a feature.
x = x.reshape(-1,1)
y = y.reshape(-1,1)

In [14]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split # Imports the function to split data

# Split the data into 80% for training and 20% for testing
# random_state is set for reproducibility of the split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [15]:
# Transform the independent variable x into polynomial features
# This step creates new features by raising the existing feature (x) to a specified degree.
# In this case, degree=2 means it will create features x^0 (constant), x^1 (linear), and x^2 (quadratic).
poly = PolynomialFeatures(degree=2) # Generate a second degree polynomial features.
x_train_poly = poly.fit_transform(x_train) # Learn the polynomial features from the training data and transform it
x_test_poly = poly.transform(x_test) # Transform the test data using the same learned polynomial features

In [16]:
# Create an instance of the linear regression class and fit the polynomial
# characteristics to the model.
# Now we use a standard Linear Regression model, but we feed it the polynomial features (x_train_poly) instead of the original x_train.
model = LinearRegression() # Create a Linear Regression model object
model.fit(x_train_poly, y_train) # Train the model using the polynomial features and the dependent variable

In [17]:
# Generate predictions on training and test sets
# Use the trained model to predict the dependent variable values for both the training and testing sets.
y_train_pred = model.predict(x_train_poly) # Predict y values for the training data
y_test_pred = model.predict(x_test_poly) # Predict y values for the test data

Visualize the original data and the polynomial regression curve

In [20]:
import plotly.graph_objects as go # Import Plotly library for interactive plotting

# Scatter plot for training data
trace_train = go.Scatter(x=x_train.flatten(), y=y_train.flatten(), mode='markers', name='Training Data', marker=dict(color='blue')) # Create a scatter plot for the original training data

# Scatter plot for test data
trace_test = go.Scatter(x=x_test.flatten(), y=y_test.flatten(), mode='markers', name='Testing Data', marker=dict(color='green')) # Create a scatter plot for the original test data

# Line plot for polynomial regression
# To plot the regression line correctly, we use the original x_train values and the predicted y values from the model.
# We sort the x_train values to ensure the line is drawn in the correct order.
trace_regression = go.Scatter(x=x_train.flatten(), y=y_train_pred.flatten(), mode='lines', name='Polynomial Regression', line=dict(color='red', width=2)) # Create a line plot for the polynomial regression curve

# Create layout for the plot
layout = go.Layout(title='Polynomial Regression', xaxis=dict(title='Independent Variable - X'), yaxis=dict(title='Dependent Variable - Y')) # Define the layout for the plot including title and axis labels


# Combine the traces and layout and create the figure
figure = go.Figure(data=[trace_train, trace_test, trace_regression], layout=layout) # Create the figure by combining the scatter plots and the regression line

# Show the plot
figure.show() # Display the interactive plot