# Random Forest Regression GridSearch Regression
Step-by-step implementation based on your PDF

In [1]:
# Step 1: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Step 2: Load Dataset
dataset = pd.read_csv('insurance_pre.csv')
dataset.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [3]:
# Step 3: Convert categorical data
dataset = pd.get_dummies(dataset, drop_first=True)
dataset.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,False,True
1,18,33.77,1,1725.5523,True,False
2,28,33.0,3,4449.462,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.88,0,3866.8552,True,False


In [4]:
# Step 4: Split independent and dependent variables
X = dataset[['age','bmi','children','sex_male','smoker_yes']]
y = dataset['charges']

In [5]:
# Step 5: Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [6]:
# Step 6: Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
# Step 7: Import Model
from sklearn.ensemble import RandomForestRegressor

In [8]:
# Step 8: GridSearchCV
from sklearn.model_selection import GridSearchCV

param_grid = {'n_estimators':[10,100], 'criterion':['squared_error','absolute_error'], 'max_features':['sqrt','log2']}

grid = GridSearchCV(RandomForestRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'n_estimators': 100}


In [9]:
# Step 9: Model Evaluation
from sklearn.metrics import r2_score

pred = grid.predict(X_test)
score = r2_score(y_test, pred)

print("R2 Score:", score)

R2 Score: 0.8692287303795655


In [10]:
# Step 10: Future Prediction
age = float(input("Enter Age: "))
bmi = float(input("Enter BMI: "))
children = int(input("Enter Children: "))
sex = int(input("Sex Male (0/1): "))
smoker = int(input("Smoker (0/1): "))

sample = [[age, bmi, children, sex, smoker]]
prediction = grid.predict(sample)

print("Predicted Charges:", prediction)

Predicted Charges: [16617.2183501]
