In [None]:

#importing all libs

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



In [None]:
from google.colab import files
files.upload("")

In [None]:
# Load dataset
df = pd.read_csv('insurance.csv')

In [None]:
X = df.drop('charges', axis=1)
y = df['charges']

In [None]:

categorical_features = ['sex', 'smoker', 'region']
numerical_features = ['age', 'bmi', 'children']


In [None]:
encoder = OneHotEncoder(drop='first')
X_cat = encoder.fit_transform(X[categorical_features])

In [None]:
# Scale numerical features
scaler = StandardScaler()
X_num = scaler.fit_transform(X[numerical_features])

# Convert sparse matrix to dense array
X_cat_dense = X_cat.toarray()

# Combine processed features
X_processed = np.hstack([X_num, X_cat_dense])

X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42)

In [None]:
# Create the Linear Regression model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train, y_train)

lr_predictions = lr_model.predict(X_test)

# Evaluate the model
lr_mae = mean_absolute_error(y_test, lr_predictions)
lr_rmse = np.sqrt(mean_squared_error(y_test, lr_predictions))
lr_r2 = r2_score(y_test, lr_predictions)

print("Linear Regression Performance:")
print(f"MAE: {lr_mae:.2f}")
print(f"RMSE: {lr_rmse:.2f}")
print(f"R2 Score: {lr_r2:.2f}")


In [None]:
# Create the Decision Tree Regressor model
dt_model = DecisionTreeRegressor(random_state=42)

dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)

# Evaluate the model
dt_mae = mean_absolute_error(y_test, dt_predictions)
dt_rmse = np.sqrt(mean_squared_error(y_test, dt_predictions))
dt_r2 = r2_score(y_test, dt_predictions)

print("\nDecision Tree Regressor Performance:")
print(f"MAE: {dt_mae:.2f}")
print(f"RMSE: {dt_rmse:.2f}")
print(f"R2 Score: {dt_r2:.2f}")


In [None]:
# Create the XGBoost Regressor model
xgb_model = XGBRegressor(random_state=42, verbosity=0)

xgb_model.fit(X_train, y_train)

# Predict on test data
xgb_predictions = xgb_model.predict(X_test)

# Evaluate the model
xgb_mae = mean_absolute_error(y_test, xgb_predictions)
xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_predictions))
xgb_r2 = r2_score(y_test, xgb_predictions)

print("\nXGBoost Regressor Performance:")
print(f"MAE: {xgb_mae:.2f}")
print(f"RMSE: {xgb_rmse:.2f}")
print(f"R2 Score: {xgb_r2:.2f}")


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

rf_model = RandomForestRegressor(random_state=42)

rf_model.fit(X_train, y_train)

rf_predictions = rf_model.predict(X_test)

#Evaluate the model
rf_mae = mean_absolute_error(y_test, rf_predictions)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_predictions))
rf_r2 = r2_score(y_test, rf_predictions)

print("Random Forest Regressor Performance:")
print(f"MAE: {rf_mae:.2f}")
print(f"RMSE: {rf_rmse:.2f}")
print(f"R2 Score: {rf_r2:.2f}")


In [None]:
print("\nSummary of Model Performances:")
print(f"Linear Regression R2: {lr_r2:.2f}")
print(f"Decision Tree R2: {dt_r2:.2f}")
print(f"XGBoost R2: {xgb_r2:.2f}")
print(f"randomforest regression R2:{rf_r2:.2f}")

best_model = "Linear Regression"
best_score = lr_r2

if dt_r2 > best_score:
    best_model = "Decision Tree Regressor"
    best_score = dt_r2
if xgb_r2 > best_score:
    best_model = "XGBoost Regressor"
    best_score = xgb_r2
if rf_r2 > best_score:
    best_model = "Randomforest Regressor"
    best_score = rf_r2


print(f"\nBest model based on R2 score: {best_model} with R2 = {best_score:.2f}")
