## Import

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Split data
from sklearn.model_selection import train_test_split

# Normalization
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

## Download the Insurance Dataset

In [2]:
url = 'https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv'
dataset = pd.read_csv(url)
dataset

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [3]:
# check unknowns
print(dataset.isna().sum())

# clean rows with unknowns
dataset = dataset.dropna()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64


## One-hot from Pandas

In [4]:
#dataset_onehot = pd.get_dummies(dataset)
#dataset_onehot

In [5]:
#features = dataset_onehot.drop('charges', axis='columns')
#features

In [6]:
#labels = dataset_onehot['charges']
#labels

## Normalization

In [7]:
ct = make_column_transformer(
    (MinMaxScaler(), ['age', 'bmi', 'children']),
    (OneHotEncoder(handle_unknown='ignore'), ['sex', 'smoker', 'region'])
)

features = dataset.drop('charges', axis='columns')
labels = dataset['charges']

train_data, test_data, train_labels, test_labels = train_test_split(features, labels, train_size=0.8, random_state=42)

# Fit the column
ct.fit(train_data)

# normalization
train_data_normal = ct.transform(train_data)
test_data_normal = ct.transform(test_data)

## Build the Model

In [8]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss='mae',
    metrics=['mae']
)

history = model.fit(
    train_data_normal,
    train_labels,
    epochs=200
)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

## Prediction

In [9]:
preds = model.predict(test_data_normal).flatten()
preds, test_labels



(array([ 9082.579 ,  5254.495 , 46017.535 ,  9265.7   , 30130.025 ,
         4586.5874,  1965.7875, 13901.447 ,  3884.5354, 10210.362 ,
        35068.703 ,  7255.9194,  4055.4656, 41044.73  , 44219.59  ,
        40776.023 , 10201.5205, 40694.14  ,  8308.283 , 38788.043 ,
         5013.355 ,  7501.2495,  1153.9684,  2785.9944, 11021.082 ,
        11097.818 , 12673.7705,  5141.868 ,  9699.447 ,   942.2314,
         8294.624 , 11864.327 ,  1996.8252,  5693.3667,  3031.7043,
         7649.812 ,  2547.2349,  7362.083 , 42363.38  , 36368.906 ,
         4354.6104,  2625.636 , 11849.92  , 11905.637 ,  4903.2725,
        12192.809 ,  3459.9827,  4396.5415, 39871.223 ,  4524.8823,
        13804.944 ,  1486.6987,  6934.3784,  1523.6863, 10847.052 ,
        10276.89  ,  3806.7708, 35928.895 , 11941.133 , 10766.131 ,
        13633.766 ,  4818.018 , 14146.526 ,  7972.365 , 10369.484 ,
         4231.657 , 32536.78  , 10906.142 ,  3653.934 ,  1742.7023,
         6121.137 ,  9457.463 ,  8575.518 ,  637

## Evaluate the Model

In [10]:
eval = model.evaluate(test_data_normal, test_labels, verbose=1)

for name, value in zip(model.metrics_names, eval):
  print("%s: %.10f" % (name, value))

9/9 - 0s - loss: 2875.7063 - mae: 2875.7063 - 203ms/epoch - 23ms/step
loss: 2875.706
mae: 2875.706
