# Polynomial Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 755)

## Training the Linear Regression model on the whole dataset

In [4]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

LinearRegression()

## Training the Polynomial Regression model on the whole dataset

In [15]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error

# Polynomial Regression
poly_reg = PolynomialFeatures(degree = 13)
X_poly = poly_reg.fit_transform(X_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y_train)
y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform(X_test))

score_poly = r2_score(y_test, y_pred_poly)
print(f"r2 score Poly: {round(score_poly*100, 3)} %")
mse_poly = mean_squared_error(y_true=y_test, y_pred=y_pred_poly)
print(f"mean squared error Poly: {round(mse_poly, 3)}")

# Linear Regression
y_pred_lin = lin_reg.predict(X)
score_lin = r2_score(y, y_pred_lin)
print(f"r2 score Lin: {round(score_lin*100, 3)} %")
mse_lin = mean_squared_error(y_true=y, y_pred=y_pred_lin)
print(f"mean squared error Lin: {round(mse_lin, 3)}")

r2 score Poly: 99.861 %
mean squared error Poly: 3844516.202
r2 score Lin: 64.996 %
mean squared error Lin: 28235069444.444


In [6]:
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn.metrics import r2_score, mean_squared_error

# mx = [-1, 0]
# for i in range(1, ):
#     poly_reg = PolynomialFeatures(degree = i)
#     X_poly = poly_reg.fit_transform(X_train)
#     lin_reg_2 = LinearRegression()
#     lin_reg_2.fit(X_poly, y_train)
#     y_pred = lin_reg_2.predict(poly_reg.fit_transform(X_test))
#     score = r2_score(y_test, y_pred)
#     if score > mx[0]:
#         mx[0] = score
#         mx[1] = i
# mx

## Visualising the Linear Regression results

In [7]:
# plt.scatter(X, y, color = 'red')
# plt.plot(X, lin_reg.predict(X), color = 'blue')
# plt.title('Truth or Bluff (Linear Regression)')
# plt.xlabel('Position Level')
# plt.ylabel('Salary')
# plt.show()

## Visualising the Polynomial Regression results

In [8]:
# plt.scatter(X, y, color = 'red')
# plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue')
# plt.title('Truth or Bluff (Polynomial Regression)')
# plt.xlabel('Position level')
# plt.ylabel('Salary')
# plt.show()

## Visualising the Polynomial Regression results (for higher resolution and smoother curve)

In [9]:
# X_grid = np.arange(min(X), max(X), 0.1)
# X_grid = X_grid.reshape((len(X_grid), 1))
# plt.scatter(X, y, color = 'red')
# plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color = 'blue')
# plt.title('Truth or Bluff (Polynomial Regression)')
# plt.xlabel('Position level')
# plt.ylabel('Salary')
# plt.show()

## Predicting a new result with Linear Regression

In [10]:
# lin_reg.predict([[6.5]])

## Predicting a new result with Polynomial Regression

In [11]:
# lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))