# Sources

- [Master Polynomial Regression With Easy-to-Follow Tutorials](https://www.analyticsvidhya.com/blog/2021/07/all-you-need-to-know-about-polynomial-regression)

In [16]:
# Import all necessary libraries
import numpy as np

# Visualize the data in 3-d space
import plotly.express as px

# Draw the surface in 3-d space
import plotly.graph_objects as go

# Linear regression
from sklearn.linear_model import LinearRegression

# Split dataset into train & test datasets
from sklearn.model_selection import train_test_split

# Frame data
import pandas as pd

# Evaluate the model
from sklearn import metrics


In [17]:
# Create a dataset
x = 7 * np.random.rand(100, 1) - 2.8 # in range [-2.8, 4.2]
y = 7 * np.random.rand(100, 1) - 2.8 # in range [-2.8, 4.2]

# Data points follow the equation with random noise values
noise = np.random.randn(100, 1)
z = x**2 + y**2 + 0.2*x + 0.1*x*y + 2 + noise

In [60]:
# Shape of the data point
df = px.data.iris()

# .ravel() flatten a multi-dimensional array to 1D array
fig = px.scatter_3d(df, x=x.ravel(), y=y.ravel(), z=z.ravel())

# Visualize the data in 3-d space using a 3-D scatter plot
fig.show()

In [69]:
# Linear Regression with multiple variables

# Set up independent variables

X = np.concatenate((x, y), axis=1)
Y = z.flatten()

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Linear Regression
model = LinearRegression()
model.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = model.predict(X_test)

# Evaluate the model
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, Y_pred))
print('Root Mean Squared Error:', metrics.mean_squared_error(Y_test, Y_pred, squared=False))

# Print the coefficients and intercept
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)


[[-2.59938267]
 [-0.20405469]
 [ 3.68721498]
 [ 1.0204816 ]
 [ 3.31308906]
 [-2.24608962]
 [-0.33633583]
 [ 1.63600682]
 [ 1.32432279]
 [ 4.18213725]
 [ 1.27061321]
 [-0.72603653]
 [ 1.65651268]
 [ 2.53117803]
 [ 2.36042578]
 [ 1.67552407]
 [-1.6739141 ]
 [-1.78791129]
 [ 0.80043571]
 [ 2.82232288]]
Mean Absolute Error: 4.844537010187521
Mean Squared Error: 30.47303073896249
Root Mean Squared Error: 5.520238286429535
Coefficients: [2.37622135 1.74850089]
Intercept: 8.697121942437528



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



In [94]:
# Linear Regression with multiple variables (continue)

# color=np.empty(Y_train.size).fill(1)
# fig.scatter_3d(df, x=X_train[:, 0], y=X_train[:, 1], z=Y_train, color=color, title="Training Data")

# Visualize data
fig = go.Figure()
fig.add_scatter3d(x=X_train[:,0], y=X_train[:,1], z=Y_train, mode="markers", name="Training Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_test, mode="markers", name="Test Data")
fig.add_scatter3d(x=X_test[:,0], y=X_test[:,1], z=Y_pred, mode="markers", name="Predicted Data")
fig.show()

In [20]:
# Polynomial Regression with multiple variables

[[[ 1]
  [ 2]
  [ 3]
  [ 4]]

 [[10]
  [20]
  [30]
  [40]]]
[[ 1  2]
 [ 3  4]
 [10 20]
 [30 40]]
