# Exercise 7-4: Polynomial Regression Demo

## Step 1: Problem Analysis and Framing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import sklearn.metrics as sm

## Step 2: Data Preparation

In [None]:
# Importing the dataset
df = pd.read_csv('../data/salaries.csv')

In [None]:
df.shape

In [None]:
df

In [None]:
# plot all
plt.ylabel('level')
plt.xlabel('salary')
plt.scatter(df.Salary, df.Level, color='green')
plt.show()

In [None]:
import seaborn as sns
# sns.histplot(df['age'],  label='age')  
sns.distplot(df['Level'],  label='level', norm_hist=True)  

In [None]:
# sns.histplot(df['age'],  label='age')  
sns.distplot(df['Salary'],  label='salary', norm_hist=True)  

In [None]:
# check the correlation 
corr_matrix = df.corr()
corr_matrix

## Step 3: Train a Model

In [None]:
# split x and y
X = df.iloc[:, 1:2].values
y = df.iloc[:, 2].values

In [None]:
# Split into the training set and test set
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Linear Regression First

In [None]:
# creating Linear Regression model
linreg = LinearRegression()

# fitting the model to our data
linreg.fit(X, y)

In [None]:
y_predicted = linreg.predict(X)
y_predicted

In [None]:
y

In [None]:
# Visualise the Linear Regression 
plt.title('Linear Regression')
plt.scatter(X, y, color='red')
plt.plot(X, y_predicted, color='blue')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

### Polynomial Regression

In [None]:
# polynomial regression model
polyreg = PolynomialFeatures(degree=4)

# transform my train data to adjust the polynom to linear regression model
X_pol = polyreg.fit_transform(X)

In [None]:
# create linear regression model
pollinreg = LinearRegression()
pollinreg.fit(X_pol, y)

In [None]:
# apply the model on my train data
y_predicted = pollinreg.predict(X_pol)

In [None]:
y_predicted

In [None]:
y

In [None]:
# Visualise the Polymonial Regression results
plt.title('Polynomial Regression')
plt.scatter(X, y, color='red')
plt.plot(X, y_predicted, color='green')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

In [None]:
print("Multiple Regression Performance")

# The coefficients
print('Coefficient: ', pollinreg.coef_)
print('Intercept: ', pollinreg.intercept_)

## Step 4: Test and Compare the Models

In [None]:
# Predicting a new result with Linear Regression
linreg.predict([[5.5]])
#output should be 249500

In [None]:
# one more
linreg.predict([[8.0]])

In [None]:
# Predicting the same with Polymonial Regression
pollinreg.predict(polyreg.fit_transform([[5.5]]))
#output should be 132148.43750003


In [None]:
# the second test
pollinreg.predict(polyreg.fit_transform([[8]]))

## Step 5: Assess the Model

In [None]:
# RMSE (Root mean squared error) answers the question: "How similar, on average, are the numbers in list1 to list2?"  
rmse = np.sqrt(sm.mean_squared_error(y, y_predicted))
r2 = sm.r2_score(y, y_predicted)

In [None]:
rmse

In [None]:
r2

Pretty good result!

## Exercise
Your turn: Applly polynomial regression to the China data.

### Reference
https://towardsdatascience.com/machine-learning-polynomial-regression-with-python-5328e4e8a386 <br>
https://online.stat.psu.edu/stat501/lesson/9/9.8 <br>
https://github.com/justmarkham/scikit-learn-videos