# Regression Assignment - Insurance Charges Prediction

Auto-generated notebook with correct ML workflow and dataset usage.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Load dataset
dataset = pd.read_csv("insurance_pre.csv")

# Convert categorical to numeric
dataset = pd.get_dummies(dataset, drop_first=True)

X = dataset.drop("charges", axis=1)
y = dataset["charges"]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model with GridSearch
param_grid = {
    "n_estimators": [100, 200],
    "max_features": ["sqrt", "log2"],
    "criterion": ["squared_error"]
}

grid = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

# Predict
pred = grid.predict(X_test)

# Score
print("Best Params:", grid.best_params_)
print("R2 Score:", r2_score(y_test, pred))


Best Params: {'criterion': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 200}
R2 Score: 0.8549393349573777
