In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_absolute_error
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.linear_model import LinearRegression, Ridge

In [None]:
# Import data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')
dataset.tail()

In [None]:


# Features and target
X = dataset.drop('expenses', axis=1)
y = dataset['expenses']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Generate polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

# Ridge regression on polynomial features
model = Ridge(alpha=1.0)
model.fit(X_train_poly, y_train)

In [None]:
# Predict and compute MAE
y_pred = model.predict(X_test_poly)
mae = mean_absolute_error(y_test, y_pred)
print(f"✅ Mean Absolute Error with Polynomial Features: ${mae:.2f}")



In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("Actual Expenses")
plt.ylabel("Predicted Expenses")
plt.title("Actual vs Predicted Healthcare Expenses")
plt.plot([0, max(y_test)], [0, max(y_test)], color='red')
plt.grid(True)
plt.show()


In [None]:
# Plot predictions.
test_predictions = model.predict(X_test_poly).flatten()

# Mean Absolute Error is already calculated in the previous cell.
# We will use the calculated MAE to check the condition.
# mae is defined in the previous cell
# loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=2)

# print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae))

if mae < 3500:
  print("Passed")
else:
  print("Failed")

a = plt.axes(aspect='equal')
plt.scatter(y_test, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)