## Importing necessary libraries

In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
    

## Convert sample data to a pandas DataFrame

In [2]:
data = pd.read_csv('insurance.csv')

## Extract features and target

In [3]:

X = data.iloc[:, :-1]
y = data.iloc[:, -1]
    

## Split the data into training and test sets

In [4]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9)
    

## Create the column transformer

In [5]:

ct = ColumnTransformer([
    ('one_hot_encoder', OneHotEncoder(), [1, 4, 5]),
    ('standard_scaler', StandardScaler(), [0, 2, 3]),
], remainder='passthrough')
    

## Transform training and test data

In [6]:

X_train_transformed = ct.fit_transform(X_train)
X_test_transformed = ct.transform(X_test)
    

## Create the random forest regression model

In [7]:

rf = RandomForestRegressor(n_estimators=100, random_state=9)
rf.fit(X_train_transformed, y_train)
y_pred = rf.predict(X_test_transformed)
    

## Evaluation metrics

In [8]:

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
score = rf.score(X_test_transformed, y_test)
print(f"R squared score: {score:.2f}")
    

R squared score: 0.84


## Save and load model and transformer

In [9]:

from joblib import dump, load

# After training your model and transformer:
dump(rf, 'trained_model.joblib')
dump(ct, 'transformer.joblib')

# Load the model and transformer
rf = load('trained_model.joblib')
ct = load('transformer.joblib')
    

metricts r2 

In [10]:
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.8416321163641215

## predict_insurance_cost function

In [11]:

def predict_insurance_cost(age, sex, bmi, children, smoker, region, model=rf, transformer=ct):
    # Create a DataFrame from the inputs
    input_data = pd.DataFrame({
        'age': [age],
        'sex': [sex],
        'bmi': [bmi],
        'children': [children],
        'smoker': [smoker],
        'region': [region]
    })
    
    # Transform the input data using the same transformer used during training
    transformed_input = transformer.transform(input_data)
    
    # Make prediction using the trained model
    prediction = model.predict(transformed_input)
    
    return prediction[0]
    

## Test the function with a sample input

In [12]:

predict_insurance_cost(19, 'female', 27.9, 0, 'yes', 'southwest')
    

16898.464367000026