In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.preprocessing import OneHotEncoder

In [2]:
df=pd.read_csv("insurance1.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
# Apply mapping column-wise
df['sex'] = df['sex'].map({'male': 1, 'female': 0})
df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})
df['region'] = df['region'].map({
    'northeast': 0,
    'northwest': 1,
    'southeast': 2,
    'southwest': 3
})

In [4]:
X=df[['age','sex','smoker','region','bmi','children','region']]
y=df['charges']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict prices
y_pred = model.predict(X_test)

In [7]:
# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = mse ** 0.5


print("=== Multiple Linear Regression Results ===")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


# Show predictions
results = pd.DataFrame({'Actual Price': y_test, 'Predicted Price': y_pred})
print("\nPredicted vs Actual:")
print(results)


=== Multiple Linear Regression Results ===
Mean Absolute Error (MAE): 4155.24
Mean Squared Error (MSE): 33805466.90
Root Mean Squared Error (RMSE): 5814.25
R² Score: 0.77

Predicted vs Actual:
      Actual Price  Predicted Price
764     9095.06825      8931.421164
887     5272.17580      7070.906703
890    29330.98315     36937.080496
1293    9301.89355      9596.992144
259    33750.29180     27008.354871
...            ...              ...
701     9541.69555     16133.551618
672     4399.73100      6743.836928
1163    2200.83085      2066.203492
1103   11363.28320     14697.798124
1295    1964.78000         2.738235

[402 rows x 2 columns]


Ratio of rmse and mae is neary by 1 or 1.2 that is excellent model 
rmse/mae=1 or 1.2 that is good

Feature scaling is critical whenever your model depends on distance or gradient.

StandardScaler → centers data (0 mean, unit variance)

MinMaxScaler → squeezes data into [0, 1]

Linear Regression → scaling helps stable learning & balanced coefficients

LOF → scaling ensures fair outlier detection by balancing distances