In [None]:
# 📌 Medical Cost Prediction (FreeCodeCamp Project)

# Step 1: Import Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Step 2: Load Data
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"
data = pd.read_csv(url)
print(data.head())

# Step 3: One-Hot Encode Categorical Columns
data = pd.get_dummies(data, columns=["sex", "smoker", "region"], drop_first=True)

# Step 4: Split into Features & Labels
labels = data.pop("expenses")  # y
features = data                # X

# Train-Test Split
train_dataset, test_dataset, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.2, random_state=42
)

# Step 5: Normalize Data
normalizer = layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_dataset))

# Step 6: Build Model
def build_model():
    model = keras.Sequential([
        normalizer,
        layers.Dense(64, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(1)  # regression output
    ])
    model.compile(optimizer="adam", loss="mae", metrics=["mae"])
    return model

model = build_model()

# Step 7: Train Model
history = model.fit(
    train_dataset, train_labels,
    epochs=100,
    validation_split=0.2,
    verbose=0
)

# Step 8: Evaluate
loss, mae = model.evaluate(test_dataset, test_labels, verbose=0)
print(f"\n✅ Mean Absolute Error on test set: {mae:.2f}")

# Step 9: Predictions
preds = model.predict(test_dataset[:10]).flatten()
print("\nSample Predictions vs Actual:")
for i in range(10):
    print(f"Predicted: {preds[i]:.2f}, Actual: {test_labels.iloc[i]}")