# Polynomial Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('../../Datasets/insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
# One hot encoding: turn string column into 3 different columns (countries), for 3 different categories
# Bindary vector: each country to a certain order of the columns

# Coding independent variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Create object of the column transformer class
# [0] is the index of column to apply OneHotEncoding
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,4,5])], remainder='passthrough')

# Apply the transform method to change the column X
X = np.array(ct.fit_transform(X))

print(X)

[[1.0 0.0 0.0 ... 19 27.9 0]
 [0.0 1.0 1.0 ... 18 33.77 1]
 [0.0 1.0 1.0 ... 28 33.0 3]
 ...
 [1.0 0.0 1.0 ... 18 36.85 0]
 [1.0 0.0 1.0 ... 21 25.8 0]
 [1.0 0.0 0.0 ... 61 29.07 0]]


## Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Polynomial Regression model on the Training set

In [5]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression()

## Predicting the Test set results

In [6]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[12138.18  9724.53]
 [ 9155.85  8547.69]
 [59199.22 45702.02]
 [10653.05 12950.07]
 [15989.76  9644.25]
 [ 2776.92  4500.34]
 [ 3150.07  2198.19]
 [12741.5  11436.74]
 [ 8043.88  7537.16]
 [ 8745.3   5425.02]
 [ 7810.69  6753.04]
 [10817.24 10493.95]
 [ 8488.86  7337.75]
 [ 8580.87  4185.1 ]
 [21717.04 18310.74]
 [14715.23 10702.64]
 [13171.39 12523.6 ]
 [ 8416.47  3490.55]
 [ 7925.2   6457.84]
 [28746.9  33475.82]
 [23080.65 23967.38]
 [15923.7  12643.38]
 [11207.12 23045.57]
 [29297.63 23065.42]
 [ 3349.69  1674.63]
 [ 9542.16  4667.61]
 [ 6792.95  3732.63]
 [ 9027.07  7682.67]
 [ 7356.22  3756.62]
 [10671.92  8413.46]
 [ 7632.01  8059.68]
 [47645.14 48970.25]
 [13322.76 12979.36]
 [11233.29 20630.28]
 [14430.14 14571.89]
 [ 5325.81  4137.52]
 [ 9215.1   8347.16]
 [39888.64 51194.56]
 [39965.52 40003.33]
 [ 3266.6   1880.49]
 [ 6965.84  5458.05]
 [ 3760.76  2867.12]
 [27211.87 20149.32]
 [56217.38 47496.49]
 [34723.86 36149.48]
 [ 8407.68 26018.95]
 [14652.67 19749.38]
 [ 8112.75  6

## Evaluating the Model Performance

In [7]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8511064652975759