# RandomForest with HyperTuning concept:

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv("insurance_pre.csv")
data.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [5]:
df = pd.get_dummies(data,drop_first = True,dtype=int)

In [8]:
independent = df[['age', 'bmi', 'children','sex_male', 'smoker_yes']]

In [10]:
dependent = df[["charges"]]

In [12]:
independent.head(5)


Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,0,1
1,18,33.77,1,1,0
2,28,33.0,3,1,0
3,33,22.705,0,1,0
4,32,28.88,0,1,0


In [14]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [18]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

param_grid = {'criterion':['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
'max_features': ['sqrt','log2'],
'n_estimators':[10,50,100,250,500]}

In [20]:
grid = GridSearchCV(RandomForestRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [22]:
print(grid.best_params_)

{'criterion': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 100}


In [24]:
re=grid.cv_results_
table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.029226,0.004674,0.005828,0.003075,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.843222,0.770261,0.795139,0.802967,0.752976,0.792913,0.030806,35
1,0.130732,0.01404,0.006727,0.004181,squared_error,sqrt,50,"{'criterion': 'squared_error', 'max_features':...",0.863989,0.79148,0.807144,0.838079,0.763083,0.812755,0.035264,16
2,0.254136,0.00519,0.009576,0.003836,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.869865,0.790473,0.811523,0.831499,0.771862,0.815045,0.033932,1
3,0.64638,0.018001,0.026699,0.00478,squared_error,sqrt,250,"{'criterion': 'squared_error', 'max_features':...",0.864806,0.790494,0.812297,0.830479,0.77249,0.814113,0.032035,7
4,1.252932,0.017004,0.045669,0.004206,squared_error,sqrt,500,"{'criterion': 'squared_error', 'max_features':...",0.86843,0.793898,0.809587,0.83014,0.769789,0.814369,0.033468,5
5,0.024579,0.003068,0.002862,0.003548,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.844345,0.750236,0.786855,0.809317,0.758146,0.78978,0.034444,38
6,0.125008,0.004112,0.004744,0.00447,squared_error,log2,50,"{'criterion': 'squared_error', 'max_features':...",0.861356,0.780335,0.810294,0.823061,0.765737,0.808157,0.033557,30
7,0.246832,0.003808,0.011267,0.001282,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.867195,0.789617,0.800936,0.82758,0.768183,0.810702,0.03413,26
8,0.631345,0.014186,0.024307,0.0057,squared_error,log2,250,"{'criterion': 'squared_error', 'max_features':...",0.866004,0.792501,0.806083,0.825907,0.770614,0.812222,0.032365,20
9,1.264858,0.039117,0.046949,0.003795,squared_error,log2,500,"{'criterion': 'squared_error', 'max_features':...",0.865029,0.794027,0.81144,0.829627,0.771456,0.814316,0.031822,6


In [26]:
y_pred = grid.predict(X_test)

In [28]:
from sklearn.metrics import r2_score
r2score = r2_score(y_test,y_pred)
r2score

0.8690950769852043

In [30]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))


Age: 19
BMI: 27.900
Children: 0
Sex Male 0 or 1: 0
Smoker Yes 0 or 1: 1


In [32]:
new_data = [[age_input, bmi_input, children_input, sex_male_input, smoker_yes_input]]
# IMPORTANT: Use transform(), NOT fit_transform()
new_data_scaled = sc.transform(new_data)  
print(new_data_scaled)

[[-1.46169465 -0.45767803 -0.89833872 -0.97676557  1.98149332]]


In [34]:
# Make prediction using trained model
Future_Prediction = grid.predict(new_data_scaled)

print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[17241.981491]
