## Using GridCV and applying all ML algorithms 

In [1]:
#import all the required libraries and use get 
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump
from joblib import parallel_backend


# Read data from CSV file
data = pd.read_csv('insurance_pre.csv')
data = pd.get_dummies(data,drop_first=True)

# Split the data into input and output variables
X = data.drop('charges', axis=1)
y = data['charges']


# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

In [2]:
results = []

# Multiple Linear Regression
mlr_model = LinearRegression()
mlr_params = {}
mlr_grid_search = GridSearchCV(mlr_model, mlr_params, scoring='neg_mean_squared_error')
mlr_grid_search.fit(X_train, y_train)

mlr_best_model = mlr_grid_search.best_estimator_
mlr_best_params = mlr_grid_search.best_params_
mlr_best_score = -mlr_grid_search.best_score_

results.append({'Model': 'Multiple Linear Regression', 'Best Parameters': mlr_best_params, 'Best Score': mlr_best_score})

In [3]:
# SVR
svr_model = SVR()
svr_params = {
    'kernel': ['rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'],
    'C': [10, 100, 1000]
}
svr_grid_search = GridSearchCV(svr_model, svr_params, scoring='neg_mean_squared_error')
svr_grid_search.fit(X_train, y_train)

svr_best_model = svr_grid_search.best_estimator_
svr_best_params = svr_grid_search.best_params_
svr_best_score = -svr_grid_search.best_score_

results.append({'Model': 'SVR', 'Best Parameters': svr_best_params, 'Best Score': svr_best_score})


In [4]:
# Decision Tree
dt_model = DecisionTreeRegressor()
dt_params = {
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'criterion': ['mse', 'mae'],
    
}
dt_grid_search = GridSearchCV(dt_model, dt_params, scoring='neg_mean_squared_error')
dt_grid_search.fit(X_train, y_train)

dt_best_model = dt_grid_search.best_estimator_
dt_best_params = dt_grid_search.best_params_
dt_best_score = -dt_grid_search.best_score_

results.append({'Model': 'Decision Tree', 'Best Parameters': dt_best_params, 'Best Score': dt_best_score})

In [5]:
# Random Forest
rf_model = RandomForestRegressor()
rf_params = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    #'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson']
}
rf_grid_search = GridSearchCV(rf_model, rf_params, scoring='neg_mean_squared_error')
rf_grid_search.fit(X_train, y_train)

rf_best_model = rf_grid_search.best_estimator_
rf_best_params = rf_grid_search.best_params_
rf_best_score = -rf_grid_search.best_score_

results.append({'Model': 'Random Forest', 'Best Parameters': rf_best_params, 'Best Score': rf_best_score})

In [6]:
# Create a dataframe of the evaluation results
results_df = pd.DataFrame(results)

In [7]:
results_df

Unnamed: 0,Model,Best Parameters,Best Score
0,Multiple Linear Regression,{},38879630.0
1,SVR,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",153839200.0
2,Decision Tree,"{'criterion': 'mae', 'max_depth': 5, 'min_samp...",24356350.0
3,Random Forest,"{'max_depth': 5, 'min_samples_split': 10, 'n_e...",23483050.0


In [8]:
# Find the best model based on the best score
best_model_idx = results_df['Best Score'].idxmax()
best_model_name = results_df.loc[best_model_idx, 'Model']
best_model_params = results_df.loc[best_model_idx, 'Best Parameters']
best_model_score = results_df.loc[best_model_idx, 'Best Score']

In [9]:
print('Best Model:', best_model_name)
print('Best Parameters:', best_model_params)
print('Best Score:', best_model_score)
print()

Best Model: SVR
Best Parameters: {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}
Best Score: 153839190.67146057

