In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
boston = load_boston()

In [2]:
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df['target'] = pd.Series(boston.target)
np.random.seed(42)

X = boston_df.drop('target', axis=1)
y = boston_df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestRegressor()
model.fit(X_train, y_train)
print(f'Boston model score is: {model.score(X_test, y_test) * 100:.2f}%')

Boston model score is: 86.54%


# using the Scoring parameter

In [5]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
np.random.seed(42)
heart_disease = pd.read_csv("./041 heart-disease.csv")

X = heart_disease.drop('target', axis=1)
y = heart_disease['target']

In [8]:
clf = RandomForestClassifier()

cv_acc = cross_val_score(clf, X, y)
cv_acc

array([0.78688525, 0.86885246, 0.80327869, 0.78333333, 0.76666667])

In [9]:
print(f'The cross-validated accuracy is: {np.mean(cv_acc) * 100:.2f}%')

The cross-validated accuracy is: 80.18%


In [10]:
cv_acc = cross_val_score(clf, X, y, scoring='accuracy')

In [11]:
print(f'The cross-validated accuracy is: {np.mean(cv_acc) * 100:.2f}%')

The cross-validated accuracy is: 81.17%


In [13]:
cv_precision = cross_val_score(clf, X, y, scoring='precision')
np.mean(cv_precision)

0.8222673160173161

In [15]:
# recall
cv_recall = cross_val_score(clf, X, y, scoring='recall')
np.mean(cv_recall)

0.8606060606060606

In [16]:
cv_f1 = cross_val_score(clf, X, y, scoring='f1')
np.mean(cv_f1)

0.843304189136832

In [17]:
# how about regression model?
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

np.random.seed(42)

X = boston_df.drop('target', axis=1)
y = boston_df['target']

model = RandomForestRegressor()
cv_r2 = cross_val_score(model, X, y, scoring=None)
cv_r2

array([0.77231143, 0.86035935, 0.74664002, 0.47632078, 0.26630379])

In [18]:
np.random.seed(42)
cv_r2 = cross_val_score(model, X, y, scoring='r2')
cv_r2

array([0.77231143, 0.86035935, 0.74664002, 0.47632078, 0.26630379])

In [20]:
# mean absolute error
cv_mae = cross_val_score(model, X, y, scoring='neg_mean_absolute_error')
cv_mae

array([-2.13045098, -2.49771287, -3.45471287, -3.81509901, -3.11813861])

In [21]:
# mean squared error
cv_mse = cross_val_score(model, X, y, scoring='neg_mean_squared_error')
cv_mse

array([ -7.8141513 , -12.94343325, -19.11614042, -46.28783248,
       -19.48161818])

In [22]:
np.mean(cv_mse)

-21.12863512415064

# Metric functions
using different evaluation metrics as Scikit-Learn functions. Classification evaluation functions

In [23]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

np.random.seed(42)
X = heart_disease.drop('target', axis=1)
y = heart_disease['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# make some predictions
y_preds = clf.predict(X_test)

# evaluate the classifier
print('Classifier metrics on the test set')
print(f'Accuracy: {accuracy_score(y_test, y_preds) * 100:.2f}%')
print(f'Precision: {precision_score(y_test, y_preds)}')
print(f'Recall: {recall_score(y_test, y_preds)}')
print(f'F1: {f1_score(y_test, y_preds)}')

Classifier metrics on the test set
Accuracy: 85.25%
Precision: 0.8484848484848485
Recall: 0.875
F1: 0.8615384615384615


# regression evaluation functions

In [24]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

np.random.seed(42)
X = boston_df.drop('target', axis=1)
y = boston_df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = RandomForestRegressor()
model.fit(X_train, y_train)

# make predictions using out regression model
y_preds = model.predict(X_test)

print("Regression model metrics on the test set")
print(f'R^2: {r2_score(y_test, y_preds)}')
print(f'MAE: {mean_absolute_error(y_test, y_preds)}')
print(f'MSE: {mean_squared_error(y_test, y_preds)}')

Regression model metrics on the test set
R^2: 0.8654448653350507
MAE: 2.136382352941176
MSE: 9.867437068627442
