In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras

In [2]:
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('new.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,age,is_female,bmi,children,is_smoker,charges,region_southeast,bmi_category_Obese
0,0,-1.440418,1,-0.45316,-0.909234,1,16884.924,0,0
1,1,-1.511647,0,0.509422,-0.079442,0,1725.5523,1,1
2,2,-0.79935,0,0.383155,1.580143,0,4449.462,1,1
3,3,-0.443201,0,-1.305052,-0.909234,0,21984.47061,0,0
4,4,-0.514431,0,-0.292456,-0.909234,0,3866.8552,0,0


In [5]:
X = df.drop(columns=['charges', 'Unnamed: 0'], errors='ignore')
y = np.log1p(df['charges']).astype('float32')

# Keep inputs numeric float32 for Keras
X = X.astype('float32')

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

print('train:', X_train.shape, 'val:', X_val.shape, 'test:', X_test.shape)

train: (855, 7) val: (214, 7) test: (268, 7)


In [7]:
normalizer = layers.Normalization()
normalizer.adapt(X_train)

model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    normalizer,
    layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(1e-4)),
    layers.Dropout(0.1),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

In [8]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss=keras.losses.Huber(),
    metrics=[keras.metrics.MeanAbsoluteError(name='mae'), keras.metrics.RootMeanSquaredError(name='rmse')]
)

In [9]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6),
]

history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000


In [10]:
# Evaluate on held-out test set
test = model.evaluate(X_test, y_test, verbose=1)
print(dict(zip(model.metrics_names, test)))

# Convert back to original charges scale for interpretability
pred_log = model.predict(X_test, verbose=0).squeeze()
pred = np.expm1(pred_log)
true = np.expm1(y_test)

mae_charges = np.mean(np.abs(pred - true))
rmse_charges = np.sqrt(np.mean((pred - true) ** 2))
print({'mae_charges': float(mae_charges), 'rmse_charges': float(rmse_charges)})

{'loss': 0.07896684110164642, 'mae': 0.23084834218025208, 'rmse': 0.3841770589351654}
{'mae_charges': 3208.410888671875, 'rmse_charges': 5896.1474609375}
