# **Cross Validation - Practical Implementation**

In [20]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [4]:
# loading dataset
df = pd.read_csv('/content/boston.csv')
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


**Train Test Split**

In [5]:
# importing train_test_split
from sklearn.model_selection import train_test_split

In [6]:
# splitting
X = df.drop('MEDV', axis=1)
y = df[['MEDV']]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

**X_train, y_train** --> Training Data --> To Train the model.

**X_test, y_test** --> Testing Data --> To test the model performance.

## **Implementing Decision Tree Regressor**

In [11]:
# importing DecisionTreeRegressor
from sklearn.tree import DecisionTreeRegressor

**Training**

In [13]:
reg_tree = DecisionTreeRegressor()
reg_tree.fit(X_train, y_train)

**Testing**

In [15]:
# importing metrics
from sklearn.metrics import r2_score

In [14]:
y_test_pred = reg_tree.predict(X_test)

In [16]:
# score
r2_score(y_test_pred, y_test)

0.7119200318758903

**Hyperparameter Tuning**

In [17]:
# importing GridSearchCV
from sklearn.model_selection import GridSearchCV

In [18]:
tree = DecisionTreeRegressor()

In [34]:
parameters = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
    'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'max_features': ['auto', 'sqrt', 'log2']
}
tree_cv = GridSearchCV(tree, param_grid=parameters, scoring='neg_mean_squared_error', cv=5)
tree_cv.fit(X_train, y_train)

In [35]:
# best params
tree_cv.best_params_

{'criterion': 'absolute_error',
 'max_depth': 7,
 'max_features': 'auto',
 'splitter': 'random'}

In [36]:
# best score
tree_cv.best_score_

-19.615801470588234

**Testing**

In [37]:
y_test_pred = tree_cv.predict(X_test)

In [38]:
# score
r2_score(y_test_pred, y_test)

0.5600027041197799