# Avaliação de Modelos (Parte 1)

In [1]:
!pip install mlxtend --upgrade --no-deps



In [2]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn import datasets
from sklearn.model_selection import train_test_split
from mlxtend.evaluate import bias_variance_decomp
from sklearn.metrics import accuracy_score
from sklearn.metrics import zero_one_loss
from sklearn.metrics import mean_squared_error

## Perda de regressão (MSE loss)



In [3]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [5]:
tree = DecisionTreeClassifier()

In [6]:
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
    tree, X_train, y_train, X_test, y_test, loss='mse', num_rounds = 30, random_seed = 1)

print('Avarage expected loss: %.3f' %avg_expected_loss)
print('Avarage bias^2: %.3f' %avg_bias)
print('Avarage variance: %.3f' %avg_var)

Avarage expected loss: 0.041
Avarage bias^2: 0.021
Avarage variance: 0.020


#### Perda de classificação (0-1 loss)

In [7]:
tree.fit(X_train, y_train)

In [8]:
y_test_pred = tree.predict(X_test)

In [9]:
accuracy_score(y_test, y_test_pred)

0.9777777777777777

In [10]:
print(y_test)
print(y_test_pred)
print(y_test != y_test_pred)
print(np.sum(y_test != y_test_pred))
print(np.shape(X_test))

[0 1 1 2 0 0 2 0 1 1 2 0 1 2 0 1 0 1 0 0 2 2 2 0 1 2 1 0 2 0 0 0 1 1 0 1 2
 2 0 0 1 2 1 0 2]
[0 1 1 2 0 0 2 0 1 1 2 0 1 2 0 1 0 1 0 0 2 2 2 0 1 1 1 0 2 0 0 0 1 1 0 1 2
 2 0 0 1 2 1 0 2]
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False  True False False False False False False False False False False
 False False False False False False False False False]
1
(45, 4)


In [11]:
zero_one_loss(y_test, y_test_pred, normalize = False)

1.0

## Perda de regressão (outro exemplo)

In [12]:
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [14]:
tree = DecisionTreeRegressor()

In [15]:
tree.fit(X_train, y_train)

In [16]:
y_test_pred = tree.predict(X_test)

In [17]:
mean_squared_error(y_test, y_test_pred)

6056.93984962406

In [18]:
print(y_test)
print(y_test_pred)
print(np.sum((y_test - y_test_pred)**2) / len(X_test))
print(np.shape(X_test))

[137.  47. 180. 129. 115. 141. 101.  78. 143. 259. 276.  65.  72. 118.
 163. 175.  37.  95.  75.  98. 186. 232. 141. 128.  97. 152. 222.  92.
  72. 131. 121.  68.  65. 261. 244. 125. 167. 182.  55. 216.  59.  65.
 214. 237.  90.  47. 150.  83. 109.  44. 178. 181. 200. 138. 235. 118.
 104.  59. 292. 214.  52.  79. 232.  97. 124.  53.  64.  94. 302. 259.
 201. 235.  52. 225.  70. 268.  70.  88. 142. 195. 126. 200. 252.  57.
  89. 163.  39. 170.  81. 142.  75. 104. 243. 150.  40.  60.  95. 168.
 131. 263.  43.  39.  97.  72.  61. 220.  48. 246. 127.  55.  81.  84.
 241.  77. 219.  94. 110. 122. 281. 288. 191. 185. 259. 310.  71. 178.
 220. 192. 197. 229.  59. 122. 160.]
[ 68.  50.  67.  77.  67.  91. 116.  50.  90. 321. 182.  96.  71. 109.
 198. 170.  60.  85.  80.  93. 173. 202. 140.  69.  71.  51. 166. 102.
  65. 265. 128. 148.  63. 277. 265.  53.  67.  85. 199.  63. 148.  50.
  85. 237.  52.  72. 173.  71. 257.  92.  78.  52. 160.  71. 128.  49.
 183.  63. 206. 140. 310.  42. 310.  99.

In [19]:
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
    tree, X_train, y_train, X_test, y_test, loss = 'mse', num_rounds = 3)

print('Perda esperada média: %.3f' %avg_expected_loss)
print('Viés^2 médio: %.3f' %avg_bias)
print('Variância média: %.3f' %avg_var)

Perda esperada média: 6397.461
Viés^2 médio: 4140.048
Variância média: 2257.414
