# Laboratorio de evaluación de modelos de aprendizaje supervisado

Realice los siguientes ejercicios para consolidar sus conocimientos y comprensión de la evaluación de modelos de aprendizaje supervisado.

In [59]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## Evaluación de modelos de regresión

In [60]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])

features = raw_df.values[::2, :]
target = raw_df.values[1::2, 2]

X = pd.DataFrame(features)
y = pd.DataFrame(target, columns=['MEDV'])

# Combine features and target into a single DataFrame
data = pd.concat([X, y], axis=1)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,22.0


## 1. Divida este conjunto de datos en conjuntos de train (80%) y de test (20%).

El campo `MEDV` representa el valor medio de las viviendas ocupadas por sus propietarios (en miles de dólares) y es la variable objetivo que queremos predecir.

In [61]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, f1_score, recall_score, confusion_matrix

In [62]:
X = data.drop(columns=['MEDV'])
y = data['MEDV']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Entrene un modelo `LinearRegression` en este conjunto de datos y genere predicciones tanto en el conjunto de entrenamiento como en el de prueba.

In [63]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

model.fit(X_train, y_train)

train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

## 3. Calcule e imprima R-cuadrado tanto para el conjunto de entrenamiento como para el de prueba.

In [64]:
train_r2 = r2_score(y_train, train_predictions)
test_r2 = r2_score(y_test, test_predictions)

print(f"train_r2: {train_r2}")
print(f"test_r2: {test_r2}")

train_r2: 0.6792672030954283
test_r2: 0.6041513582037021


## 4. Calcule e imprima el error cuadrático medio para el conjunto de entrenamiento y de prueba.

In [65]:
from sklearn.metrics import mean_squared_error

train_mse = mean_squared_error(y_train, train_predictions)
test_mse = mean_squared_error(y_test, test_predictions)

print(f"train_mse: {train_mse}")
print(f"test_mse: {test_mse}")

train_mse: 27.86314978796481
test_mse: 29.029078461798058


## 5. Calcule e imprima el error medio absoluto para el conjunto de entrenamiento y de prueba.

In [66]:
from sklearn.metrics import mean_absolute_error

train_mae = mean_absolute_error(y_train, train_predictions)
test_mae = mean_absolute_error(y_test, test_predictions)

print(f"train_mae: {train_mae}")
print(f"test_mae: {test_mae}")

train_mae: 3.5804986136259442
test_mae: 3.343341261259593


## Evaluación del modelo de clasificación

In [67]:
from sklearn.datasets import load_iris

data = load_iris()

X_c = pd.DataFrame(data["data"], columns=data["feature_names"])
y_c = pd.DataFrame(data["target"], columns=["class"])

data = pd.concat([X_c, y_c], axis=1)
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## 6. Divida este conjunto de datos en conjuntos de train (80%) y de test (20%).

El campo `class` representa el tipo de flor y es la variable objetivo que querremos predecir.

In [68]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_c, y_c, train_size=0.2, random_state=42)

In [69]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.figure(figsize=(8,8))
pd.options.plotting.backend = "plotly"
sns.set(rc={'figure.figsize':(6,6)});

<Figure size 800x800 with 0 Axes>

## 7. Entrene un modelo `LogisticRegression` en este conjunto de datos y genere predicciones tanto en el conjunto de entrenamiento como en el de prueba.

In [70]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=200)

model.fit(X_train, y_train.values.ravel())

train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

## 8. Calcule e imprima la puntuación de precisión tanto para el conjunto de entrenamiento como para el de pruebas.

In [80]:
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

    Esta función resuelve los ejercicios 8,9,10,11,12 y 13.

In [81]:
train_accuracy = accuracy_score(y_train, train_pred)
test_accuracy = accuracy_score(y_test, test_pred)
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Training Accuracy: 1.0
Test Accuracy: 0.9666666666666667


## 9. Calcule e imprima la puntuación de precisión equilibrada tanto para el conjunto de entrenamiento como para el de pruebas.

In [82]:
train_balanced_accuracy = balanced_accuracy_score(y_train, train_pred)
test_balanced_accuracy = balanced_accuracy_score(y_test, test_pred)
print("Training Balanced Accuracy:", train_balanced_accuracy)
print("Test Balanced Accuracy:", test_balanced_accuracy)

Training Balanced Accuracy: 1.0
Test Balanced Accuracy: 0.9655870445344129


## 10. Calcule e imprima la puntuación de precisión tanto para el conjunto de entrenamiento como para el de prueba.

In [83]:
train_precision = precision_score(y_train, train_pred, average='weighted')
test_precision = precision_score(y_test, test_pred, average='weighted')
print("Training Precision:", train_precision)
print("Test Precision:", test_precision)

Training Precision: 1.0
Test Precision: 0.9674662162162162


## 11. Calcule e imprima la puntuación de recuerdo tanto para el conjunto de entrenamiento como para el de prueba.

In [84]:
train_recall = recall_score(y_train, train_pred, average='weighted')
test_recall = recall_score(y_test, test_pred, average='weighted')
print("Training Recall:", train_recall)
print("Test Recall:", test_recall)

Training Recall: 1.0
Test Recall: 0.9666666666666667


## 12. Calcule e imprima la puntuación F1 tanto para el conjunto de entrenamiento como para el de prueba.

In [85]:
train_f1 = f1_score(y_train, train_pred, average='weighted')
test_f1 = f1_score(y_test, test_pred, average='weighted')
print("Training F1 Score:", train_f1)
print("Test F1 Score:", test_f1)

Training F1 Score: 1.0
Test F1 Score: 0.966655420602789


## 13. Generar matrices de confusión tanto para el conjunto de entrenamiento como para el de pruebas.

In [86]:
train_confusion_matrix = confusion_matrix(y_train, train_pred)
test_confusion_matrix = confusion_matrix(y_test, test_pred)
print("Training Confusion Matrix:\n", train_confusion_matrix)
print("Test Confusion Matrix:\n", test_confusion_matrix)

Training Confusion Matrix:
 [[ 7  0  0]
 [ 0 11  0]
 [ 0  0 12]]
Test Confusion Matrix:
 [[43  0  0]
 [ 0 36  3]
 [ 0  1 37]]


## Bonus: Para cada uno de los conjuntos de datos de este laboratorio, intente entrenar con algunos de los otros modelos que ha aprendido, vuelva a calcular las métricas de evaluación y compare para determinar qué modelos funcionan mejor en cada conjunto de datos.

In [77]:
# Your code here