# Evaluación

### Regresión

In [None]:
import warnings
warnings.simplefilter('ignore')

from sklearn.datasets import load_diabetes

from sklearn.ensemble import RandomForestRegressor as RFR

from sklearn.model_selection import train_test_split as tts

In [None]:
data=load_diabetes()

In [None]:
X_train, X_test, y_train, y_test = tts(data.data, data.target)

In [None]:
rf=RFR()
rf.fit(X_train, y_train)

y_pred=rf.predict(X_test)

###### MSE


$$MSE = \frac{1}{n}\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}$$


pertenece al intervalo [0, +$\infty$)

###### RMSE


$$MSE = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}}$$


pertenece al intervalo [0, +$\infty$)

###### RMSLE


$$MSE = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(log(y_i)-log(\hat{y}_i))^{2}}$$


pertenece al intervalo [0, +$\infty$)

###### MAE


$$MAE = \frac{1}{n}\sum_{i=1}^{n}|y_i-\hat{y}_i|$$


pertenece al intervalo [0, +$\infty$)

###### R2


$$R2 = 1 - \frac{\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}}{\sum_{i=1}^{n}(y_i-\bar{y})^{2}}$$

###### Adjusted R2

$$AdjustedR2 = 1-(1-R^{2})\frac{n-1}{n-p-1}$$


donde:
+ n = tamaño de la muestra
+ p = nº de variables del modelo


pertenecen al intervalo (-$\infty$, 1]

### Clasificación

In [None]:
from sklearn.datasets import load_wine
from sklearn.svm import SVC

data=load_wine()

X_train, X_test, y_train, y_test=tts(data.data, data.target)

In [None]:
svc=SVC().fit(X_train, y_train)

y_pred=svc.predict(X_test)

+ TP := True Positive (aciertos clase 1)
+ TN := True Negative (aciertos clase 0)
+ FP := False Positive (Error tipo I, decir 1 cuando es 0)
+ FN := False Negative (Error tipo II, decir 0 cuando es 1)

+ Accuracy  := (TP+TN)/(TP+TN+FP+FN) (acierto)  ($\frac{1}{n}\sum 1(\hat{y_i}=y_i$)
+ Precision := TP/(TP+FP)
+ Recall    := TP/(TP+FN)  (Sensibilidad, TPR)
+ F1_Score  := 2·Recall·Precision/(Recall+Precision)

(F1 funciona mejor que el accuracy cuando los datos no están balanceados y cuando FP y FN son muy diferentes)

![](data/f1.png)

##### Matriz de Confusión

![](data/conf_matrix.jpeg)

##### ROC-AUC  (Característica operativa del receptor y área debajo de la curva)

+ TPR := TP/(TP+FN)
+ FPR := FP/(TN+FP)


![](data/roc.png)

# HyperOpt (GridSearching bayesiano)

In [None]:
#!pip3 install hyperopt

# PyCaret

https://pycaret.org/tutorial/

https://colab.research.google.com/drive/1GqQ3XAIzg4krBbnOpKyeRqT0qBQhdwYL#scrollTo=lUvE187JEQm3

In [None]:
#!pip3 install pycaret

from pycaret.datasets import get_data
dataset=get_data('credit')

In [None]:
dataset.shape

In [None]:
data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True)
data_unseen = dataset.drop(data.index).reset_index(drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
from pycaret.classification import *

In [None]:
exp_clf101 = setup(data = data, target = 'default', session_id=123) 

In [None]:
compare_models()

In [None]:
dt = create_model('dt')
print (dt)

In [None]:
knn = create_model('knn')

In [None]:
rf = create_model('rf')

In [None]:
tuned_dt = tune_model('dt')

In [None]:
tuned_rf = tune_model('rf')

In [None]:
plot_model(tuned_rf, plot = 'auc')

In [None]:
plot_model(tuned_rf, plot = 'pr')

In [None]:
plot_model(tuned_rf, plot='feature')

In [None]:
plot_model(tuned_rf, plot = 'confusion_matrix')

In [None]:
predict_model(tuned_rf);

In [None]:
final_rf = finalize_model(tuned_rf)
print(final_rf)

In [None]:
predict_model(final_rf);