# Scikit-learn - Aprendizaje Supervisado

Este notebook cubre los algoritmos de aprendizaje supervisado en scikit-learn: clasificación y regresión.

## Importar Módulos


In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier
from sklearn.svm import SVC, SVR
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.naive_bayes import GaussianNB
import numpy as np


## Clasificación


In [2]:
# Cargar datos
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Regresión Logística
lr = LogisticRegression(random_state=42, max_iter=1000)
lr.fit(X_train, y_train)
print(f"LogisticRegression - Precisión: {lr.score(X_test, y_test):.4f}")

# Árbol de Decisión
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
print(f"DecisionTreeClassifier - Precisión: {dt.score(X_test, y_test):.4f}")

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
print(f"RandomForestClassifier - Precisión: {rf.score(X_test, y_test):.4f}")

# Support Vector Machine (SVM)
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)
print(f"SVC - Precisión: {svm.score(X_test, y_test):.4f}")

# K-Nearest Neighbors (KNN)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
print(f"KNeighborsClassifier - Precisión: {knn.score(X_test, y_test):.4f}")

# Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
print(f"GaussianNB - Precisión: {nb.score(X_test, y_test):.4f}")

# Gradient Boosting
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
print(f"GradientBoostingClassifier - Precisión: {gb.score(X_test, y_test):.4f}")


LogisticRegression - Precisión: 1.0000
DecisionTreeClassifier - Precisión: 1.0000
RandomForestClassifier - Precisión: 1.0000
SVC - Precisión: 1.0000
KNeighborsClassifier - Precisión: 1.0000
GaussianNB - Precisión: 1.0000


GradientBoostingClassifier - Precisión: 1.0000


## Regresión


In [3]:
# Cargar datos de regresión
X, y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Regresión Lineal
linear = LinearRegression()
linear.fit(X_train, y_train)
print(f"LinearRegression - R²: {linear.score(X_test, y_test):.4f}")

# Ridge Regression (regularización L2)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
print(f"Ridge - R²: {ridge.score(X_test, y_test):.4f}")

# Lasso Regression (regularización L1)
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
print(f"Lasso - R²: {lasso.score(X_test, y_test):.4f}")

# Árbol de Decisión para Regresión
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)
print(f"DecisionTreeRegressor - R²: {dt_reg.score(X_test, y_test):.4f}")

# Random Forest para Regresión
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)
print(f"RandomForestRegressor - R²: {rf_reg.score(X_test, y_test):.4f}")

# Support Vector Regression (SVR)
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)
print(f"SVR - R²: {svr.score(X_test, y_test):.4f}")

# K-Nearest Neighbors para Regresión
knn_reg = KNeighborsRegressor(n_neighbors=5)
knn_reg.fit(X_train, y_train)
print(f"KNeighborsRegressor - R²: {knn_reg.score(X_test, y_test):.4f}")


LinearRegression - R²: 0.4526
Ridge - R²: 0.4192
Lasso - R²: 0.3576
DecisionTreeRegressor - R²: 0.0607


RandomForestRegressor - R²: 0.4428
SVR - R²: 0.1821
KNeighborsRegressor - R²: 0.4302


## Parámetros Importantes


In [4]:
# Ejemplo: Random Forest con parámetros personalizados
rf_custom = RandomForestClassifier(
    n_estimators=200,        # Número de árboles
    max_depth=10,            # Profundidad máxima
    min_samples_split=5,     # Mínimo de muestras para dividir
    min_samples_leaf=2,      # Mínimo de muestras en hoja
    max_features='sqrt',     # Características a considerar ('sqrt', 'log2', None)
    random_state=42
)
rf_custom.fit(X_train, y_train)
print(f"RandomForest (customizado) - Precisión: {rf_custom.score(X_test, y_test):.4f}")

# Ejemplo: SVM con diferentes kernels
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    svm_kernel = SVC(kernel=kernel, random_state=42)
    svm_kernel.fit(X_train, y_train)
    print(f"SVC (kernel={kernel}) - Precisión: {svm_kernel.score(X_test, y_test):.4f}")


  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type =

  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type = type_of_target(y, input_name="y")
  y_type =

RandomForest (customizado) - Precisión: 0.0000
SVC (kernel=linear) - Precisión: 0.0112
SVC (kernel=poly) - Precisión: 0.0000
SVC (kernel=rbf) - Precisión: 0.0000
SVC (kernel=sigmoid) - Precisión: 0.0000


  type_true = type_of_target(y_true, input_name="y_true")
  y_type = type_of_target(y, input_name="y")
  type_true = type_of_target(y_true, input_name="y_true")
  y_type = type_of_target(y, input_name="y")
  type_true = type_of_target(y_true, input_name="y_true")
