# L14 - Model evaluation (losses)

---


- Instructor: Dalcimar Casanova (dalcimar@gmail.com)
- Course website: https://www.dalcimar.com/disciplinas/aprendizado-de-maquina
- Bibliography: based on lectures of Dr. Sebastian Raschka

In [152]:
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn import datasets

from sklearn.model_selection import train_test_split

## Classification loss (0-1 loss)


In [153]:
iris = datasets.load_iris()
X = iris.data[:,:]
y = iris.target

In [154]:
print(X.shape)
print(X[0:5,:])

(150, 4)
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]


In [155]:
print(y.shape)
print(y[0:5])

(150,)
[0 0 0 0 0]


In [156]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

tree = DecisionTreeClassifier(random_state=1)

In [157]:
tree.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=1, splitter='best')

In [158]:
y_test_pred = tree.predict(X_test)

In [159]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_test_pred)

0.9555555555555556

In [161]:
print(y_test)
print(y_test_pred)
print(y_test != y_test_pred)
print(np.sum(y_test != y_test_pred))
print(np.shape(X_test))

[0 1 1 0 2 1 2 0 0 2 1 0 2 1 1 0 1 1 0 0 1 1 1 0 2 1 0 0 1 2 1 2 1 2 2 0 1
 0 1 2 2 0 2 2 1]
[0 1 1 0 2 1 2 0 0 2 1 0 2 1 1 0 1 1 0 0 1 1 2 0 2 1 0 0 1 2 1 2 1 2 2 0 1
 0 1 2 2 0 1 2 1]
[False False False False False False False False False False False False
 False False False False False False False False False False  True False
 False False False False False False False False False False False False
 False False False False False False  True False False]
2
(45, 4)


In [164]:
from sklearn.metrics import zero_one_loss

zero_one_loss(y_test, y_test_pred, normalize=False)

2

## Regression loss (MSE loss)



In [165]:
boston = datasets.load_boston()
X = boston.data[:,:]
y = boston.target

In [166]:
print(X.shape)
print(X[0:5,:])

(506, 13)
[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
  4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
  9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
  4.0300e+00]
 [3.2370e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.9980e+00
  4.5800e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9463e+02
  2.9400e+00]
 [6.9050e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 7.1470e+00
  5.4200e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9690e+02
  5.3300e+00]]


In [167]:
print(y.shape)
print(y[0:5])

(506,)
[24.  21.6 34.7 33.4 36.2]


In [168]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

tree = DecisionTreeRegressor(random_state=1)

In [169]:
tree.fit(X_train, y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=1, splitter='best')

In [170]:
y_test_pred = tree.predict(X_test)

In [171]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test, y_test_pred)

11.351960784313727

In [172]:
print(y_test)
print(y_test_pred)
#print(y_test == y_test_pred)
print(np.sum((y_test - y_test_pred)**2)/len(X_test))
print(np.shape(X_test))

[28.2 23.9 16.6 22.  20.8 23.  27.9 14.5 21.5 22.6 23.7 31.2 19.3 19.4
 19.4 27.9 13.9 50.  24.1 14.6 16.2 15.6 23.8 25.  23.5  8.3 13.5 17.5
 43.1 11.5 24.1 18.5 50.  12.6 19.8 24.5 14.9 36.2 11.9 19.1 22.6 20.7
 30.1 13.3 14.6  8.4 50.  12.7 25.  18.6 29.8]
[32.  31.1 22.5 18.6 20.  20.8 23.1 19.1 27.1 22.  25.  30.5 19.8 21.4
 19.3 19.1 13.1 44.8 22.  12.8 20.3 17.8 23.3 23.8 22.5  9.6 13.6 18.5
 43.5  8.5 28.4 19.4 50.  11.8 23.2 21.2 15.2 33.4 13.8 20.  23.7 22.2
 25.  17.3 10.9  7.5 50.   8.5 27.5 16.1 20.6]
11.351960784313727
(51, 13)
