In [None]:
import pandas as pd
import numpy as np

try:
    df = pd.read_csv('insurance.csv')
except FileNotFoundError:
    from google.colab import files
    uploaded = files.upload()
    df = pd.read_csv('insurance.csv')

input_features = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
input_features = pd.get_dummies(input_features, drop_first=True)
output_target = df['charges']

train_size = int(0.8 * len(df))
X_train_data = input_features[:train_size]
X_test_data = input_features[train_size:]
y_train_data = output_target[:train_size]
y_test_data = output_target[train_size:]

X_train_data = np.c_[np.ones(X_train_data.shape[0]), X_train_data]
X_test_data = np.c_[np.ones(X_test_data.shape[0]), X_test_data]

X_train_data = X_train_data.astype(np.float64)
X_test_data = X_test_data.astype(np.float64)

coefficients = np.linalg.inv(X_train_data.T @ X_train_data) @ X_train_data.T @ y_train_data

y_predictions = X_test_data @ coefficients

mean_squared_error_value = np.mean((y_test_data - y_predictions) ** 2)

mean_output = np.mean(y_test_data)
total_sum_of_squares = np.sum((y_test_data - mean_output) ** 2)
residual_sum_of_squares = np.sum((y_test_data - y_predictions) ** 2)
r_squared_value = 1 - (residual_sum_of_squares / total_sum_of_squares)

print("Mean Squared Error (MSE):", f"{mean_squared_error_value:,.2f}")
print("R-squared (R²):", r_squared_value)


Mean Squared Error (MSE): 37,175,951.41
R-squared (R²): 0.7584847182677392
