<p>Algorithm that predicts the cost of a person's health plan based on their age.</p>

<h1>Importing libraries and necessary data</h1>

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('health_plan.csv')

In [3]:
df

Unnamed: 0,idade,custo
0,18,470
1,23,520
2,28,630
3,33,830
4,38,1150
5,43,1530
6,48,2040
7,53,3080
8,58,5100
9,63,10100


<h1>Separating predictive data from classes</h1>

In [4]:
x_health_plan = df.iloc[:,0:1].values

In [5]:
x_health_plan

array([[18],
       [23],
       [28],
       [33],
       [38],
       [43],
       [48],
       [53],
       [58],
       [63]], dtype=int64)

In [6]:
y_health_plan = df.iloc[:,1].values

In [7]:
y_health_plan

array([  470,   520,   630,   830,  1150,  1530,  2040,  3080,  5100,
       10100], dtype=int64)

<h1>Creating the polynomial transformer</h1>

In [8]:
from sklearn.preprocessing import PolynomialFeatures

In [10]:
poly = PolynomialFeatures(degree = 4)
x_health_plan_poly = poly.fit_transform(x_health_plan)

In [11]:
x_health_plan_poly.shape, x_health_plan_poly.shape

((10, 5), (10, 5))

In [12]:
x_health_plan_poly

array([[1.0000000e+00, 1.8000000e+01, 3.2400000e+02, 5.8320000e+03,
        1.0497600e+05],
       [1.0000000e+00, 2.3000000e+01, 5.2900000e+02, 1.2167000e+04,
        2.7984100e+05],
       [1.0000000e+00, 2.8000000e+01, 7.8400000e+02, 2.1952000e+04,
        6.1465600e+05],
       [1.0000000e+00, 3.3000000e+01, 1.0890000e+03, 3.5937000e+04,
        1.1859210e+06],
       [1.0000000e+00, 3.8000000e+01, 1.4440000e+03, 5.4872000e+04,
        2.0851360e+06],
       [1.0000000e+00, 4.3000000e+01, 1.8490000e+03, 7.9507000e+04,
        3.4188010e+06],
       [1.0000000e+00, 4.8000000e+01, 2.3040000e+03, 1.1059200e+05,
        5.3084160e+06],
       [1.0000000e+00, 5.3000000e+01, 2.8090000e+03, 1.4887700e+05,
        7.8904810e+06],
       [1.0000000e+00, 5.8000000e+01, 3.3640000e+03, 1.9511200e+05,
        1.1316496e+07],
       [1.0000000e+00, 6.3000000e+01, 3.9690000e+03, 2.5004700e+05,
        1.5752961e+07]])

<h1>Training with the data created by the polynomial regressor</h1>

In [13]:
from sklearn.linear_model import LinearRegression

In [14]:
regressor_model = LinearRegression()
regressor_model.fit(x_health_plan_poly, y_health_plan)

In [15]:
#b0
regressor_model.intercept_

16561.97460823346

In [16]:
#b1
regressor_model.coef_

array([ 0.00000000e+00, -2.12242252e+03,  9.90404198e+01, -1.95058275e+00,
        1.40792541e-02])

In [17]:
new_data = [[40]]
new_data = poly.transform(new_data)

In [18]:
regressor_model.predict(new_data)

array([1335.33958099])

In [19]:
predicts = regressor_model.predict(x_health_plan_poly)
predicts

array([ 549.65035642,  345.85081891,  616.53846305,  975.83916186,
       1249.06759982, 1472.7272729 , 1894.51048857, 2973.29836576,
       5379.16083488, 9993.35663783])

In [None]:
import plotly.express as px

In [None]:
grafic = px.scatter(x=x_health_plan[:,0], y=y_health_plan)
grafic.add_scatter(x=x_health_plan[:,0], y=predicts, name='regression')
grafic.show()