In [367]:
# Importing the necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import pickle

In [None]:
# Loading the data set
ins = pd.read_csv("data/insurance.csv")
ins

In [None]:
# Converting the smoker column into numerical format
ins.dropna(subset=['smoker'], inplace=True)     # The null value was dropped.
ins['smoker'] = ins['smoker'].map({'yes': 1, 'no': 0})
ins

In [None]:
# Converting the sex column into numerical format
ins['sex'] = ins['sex'].map({'male': 0, 'female': 1})
ins

In [371]:
# Dropping the 'region' column
ins.drop('region', axis=1, inplace=True)

In [372]:
# Getting the X and Y variables
X = ins['age']
y = ins['charges']

In [None]:
# Plotting scatterplot for x and y variables
plt.figure(figsize=(10, 6))
sns.scatterplot(data=ins, x='age', y='charges')
plt.title('Scatterplot for Charges')
plt.xlabel('Age')
plt.ylabel('Charges')
plt.show()
plt.close()

In [374]:
# Splitting the data into training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [375]:
# # Creating and Training the model
model = RandomForestRegressor(random_state=42)

# Training the model
model.fit(x_train, y_train)

# Save the model to a pickle file
with open('ins_model.pkl', 'wb') as file:
    pickle.dump(model, file)

In [None]:
# Create the predictions
predictions = model.predict(x_test)
predictions

In [None]:
#Checking the precision, recall and f1-score
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')