In [1]:
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load and preprocess the data
df = pd.read_csv(r"C:\Users\ahama\OneDrive\Desktop\insurance\insurance.csv")
df.drop_duplicates(inplace=True)

# Apply label encoding to categorical columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns

le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col].astype(str))  # Convert to string to handle NaNs safely

# Convert all relevant columns to numeric (coerce errors to NaN if any)
df[['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges']] = df[[
    'age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'
]].apply(pd.to_numeric, errors='coerce')

# Save the encoded dataset
df.to_csv(r"C:\Users\ahama\OneDrive\Desktop\insurance\encoded_insurance.csv", index=False)

# Load the encoded dataset
df = pd.read_csv(r"C:\Users\ahama\OneDrive\Desktop\insurance\encoded_insurance.csv")

# Split features and target
X = df.drop('charges', axis=1)
y = df['charges']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display the results
print(f"MAE: ₹{mae:.2f}")
print(f"MSE: ₹{mse:.2f}")
print(f"R² Score: {r2:.2f}")

from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load

# Train your model
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Save the model
dump(model, 'model_insurance.joblib')

# Later or in another script: Load the model
loaded_model = load('model_insurance.joblib')

# Use the loaded model
y_pred = loaded_model.predict(X_test)

MAE: ₹2555.93
MSE: ₹21420846.46
R² Score: 0.88
