# RandomForest with HyperTuning GridSearchCV concept:

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("insurance_pre.csv")
data.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [3]:
df = pd.get_dummies(data,drop_first = True,dtype=int)

In [7]:
independent = df[['age', 'bmi', 'children','sex_male', 'smoker_yes']]

In [9]:
dependent = df[["charges"]]

In [11]:
independent.head(5)


Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,0,1
1,18,33.77,1,1,0
2,28,33.0,3,1,0
3,33,22.705,0,1,0
4,32,28.88,0,1,0


In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(independent,dependent,test_size=0.30,random_state=42)

In [15]:
from sklearn.preprocessing import StandardScaler
#Foe Input data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [17]:
#For output data
scy = StandardScaler()
y_train = scy.fit_transform(y_train)
y_test = scy.transform(y_test)

In [21]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

param_grid = {'criterion':['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
'max_features': ['sqrt','log2'],
'n_estimators':[10,50,100,250,500]}

In [23]:
grid = GridSearchCV(RandomForestRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [25]:
print(grid.best_params_)
#For deployment phase - create pkl file with best_model name
best_model = grid.best_estimator_ 

{'criterion': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 50}


In [36]:
re=grid.cv_results_
table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.033434,0.003267,0.002052,0.00299,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.850694,0.836347,0.747739,0.786767,0.775352,0.79938,0.038478,28
1,0.122247,0.002603,0.008998,0.001912,squared_error,sqrt,50,"{'criterion': 'squared_error', 'max_features':...",0.865944,0.861701,0.762137,0.809068,0.809336,0.821637,0.038517,1
2,0.26878,0.01948,0.012586,0.003402,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.861882,0.868131,0.74972,0.796034,0.813463,0.817846,0.043827,15
3,1.523258,0.166367,0.057352,0.004644,squared_error,sqrt,250,"{'criterion': 'squared_error', 'max_features':...",0.863913,0.866506,0.757944,0.802803,0.811238,0.820481,0.040776,5
4,3.390945,0.047954,0.11144,0.013381,squared_error,sqrt,500,"{'criterion': 'squared_error', 'max_features':...",0.862846,0.867024,0.758416,0.799033,0.810655,0.819595,0.040904,8
5,0.072094,0.003574,0.005604,0.00459,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.841969,0.851088,0.734257,0.762297,0.779078,0.793738,0.045512,30
6,0.332207,0.012976,0.013706,0.003037,squared_error,log2,50,"{'criterion': 'squared_error', 'max_features':...",0.858078,0.863824,0.756319,0.796431,0.804369,0.815804,0.040342,20
7,0.676125,0.017132,0.022298,0.002272,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.86379,0.870403,0.759025,0.797823,0.804329,0.819074,0.042212,10
8,1.669551,0.06999,0.058954,0.003826,squared_error,log2,250,"{'criterion': 'squared_error', 'max_features':...",0.863157,0.867145,0.760812,0.799607,0.810417,0.820228,0.040239,7
9,3.46253,0.049562,0.11093,0.005707,squared_error,log2,500,"{'criterion': 'squared_error', 'max_features':...",0.863266,0.865679,0.761442,0.801099,0.811094,0.820516,0.039555,4


## Save the model using PKL file:

In [38]:
import pickle
filename = "rf_grid.sav"

In [40]:
#model creation
pickle.dump(best_model,open(filename,'wb'))

In [42]:
#for pre-process for X_data
pickle.dump(sc,open("X_preprocess_scaler.sav",'wb'))

In [44]:
#for pre-process for y_data
pickle.dump(scy,open("Y_preprocess_scaler.sav",'wb'))