In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error
import plotly.express as px
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

In [17]:
# load dataset
data = pd.read_csv('insurance.csv')
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [18]:
# partition the x and y
X = data.drop(columns=['charges'])
y = data['charges']

# clean the X by encoding the categorical variables
cat_cols = ['sex', 'smoker', 'region']
label_encoders = {}

# perform encoding and save each  instance in the encoder
for col in cat_cols:
    encoder = LabelEncoder()
    encoder.fit(X[col])
    X[col] = encoder.transform(X[col])
    label_encoders[col] = encoder

X.head()

Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,0,27.9,0,1,3
1,18,1,33.77,1,0,2
2,28,1,33.0,3,0,2
3,33,1,22.705,0,0,1
4,32,1,28.88,0,0,1


In [19]:

# split the data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=23)

# scale the X
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# train the model
model = RandomForestRegressor(random_state=23, n_estimators=100, max_depth=10)
model.fit(X_train, y_train)
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

# compute rmse
print(f'rmse_train: {root_mean_squared_error(y_train, train_preds)}')
print(f'rmse_test: {root_mean_squared_error(y_test, test_preds)}')

rmse_train: 2131.5453508069118
rmse_test: 4871.898569864167


In [20]:
# save the model artifact
joblib.dump(model, 'tips_model.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']