In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.kernel_approximation import RBFSampler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import os

# Cargar el dataset
path = os.path.join(os.path.dirname(os.path.abspath('')), 'ejercicio_11.3', 'Obesity prediction.csv')
data = pd.read_csv(path)

# Inspeccionar los datos
print(data.head())

# Definir características (X) y target (y)
X = data.drop(columns='Obesity')
y = data['Obesity']

# Convertir variables categóricas a numéricas si es necesario
X = pd.get_dummies(X)

# Dividir el dataset en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

   Gender   Age  Height  Weight family_history FAVC  FCVC  NCP       CAEC  \
0  Female  21.0    1.62    64.0            yes   no   2.0  3.0  Sometimes   
1  Female  21.0    1.52    56.0            yes   no   3.0  3.0  Sometimes   
2    Male  23.0    1.80    77.0            yes   no   2.0  3.0  Sometimes   
3    Male  27.0    1.80    87.0             no   no   3.0  3.0  Sometimes   
4    Male  22.0    1.78    89.8             no   no   2.0  1.0  Sometimes   

  SMOKE  CH2O  SCC  FAF  TUE        CALC                 MTRANS  \
0    no   2.0   no  0.0  1.0          no  Public_Transportation   
1   yes   3.0  yes  3.0  0.0   Sometimes  Public_Transportation   
2    no   2.0   no  2.0  1.0  Frequently  Public_Transportation   
3    no   2.0   no  2.0  0.0  Frequently                Walking   
4    no   2.0   no  0.0  0.0   Sometimes  Public_Transportation   

               Obesity  
0        Normal_Weight  
1        Normal_Weight  
2        Normal_Weight  
3   Overweight_Level_I  
4  Overwe

### Pipelines para SVM no-lineal y Voted Perceptron con kernels RBF

In [3]:
pipelines = {
    'svm_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(kernel='rbf'))  # SVM con kernel RBF
    ]),
    'voted_perceptron_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('rbf_features', RBFSampler(gamma=0.1, random_state=42)),  # Aproximación de kernel RBF
        ('perceptron', Perceptron(max_iter=1000, tol=1e-3, random_state=42))
    ])
}

# Definir hiperparámetros para ambos modelos
param_grids = {
    'svm_pipeline': {
        'svm__C': [0.1, 1, 10],           # Parámetro de regularización
        'svm__gamma': ['auto', 'scale', 0.01, 0.1, 1, 10]  # Parámetro gamma para el kernel RBF
    },
    'voted_perceptron_pipeline': {
        'rbf_features__gamma': [0.01, 0.1, 1, 10],
        'perceptron__alpha': [0.0001, 0.001, 0.01]   # Regularización del perceptrón
    }
}

In [4]:
# Realizar GridSearch para ambos modelos
grid_search_svm = GridSearchCV(pipelines['svm_pipeline'], param_grids['svm_pipeline'], cv=5, n_jobs=-1)
grid_search_perceptron = GridSearchCV(pipelines['voted_perceptron_pipeline'], param_grids['voted_perceptron_pipeline'], cv=5, n_jobs=-1)

# Entrenar los modelos
grid_search_svm.fit(X_train, y_train)
grid_search_perceptron.fit(X_train, y_train)

# Obtener los mejores resultados
print("Mejores parámetros para SVM:", grid_search_svm.best_params_)
print("Mejores parámetros para Vpted Perceptron:", grid_search_perceptron.best_params_)

# Predecir en los datos de prueba
y_pred_svm = grid_search_svm.best_estimator_.predict(X_test)
y_pred_perceptron = grid_search_perceptron.best_estimator_.predict(X_test)

Mejores parámetros para SVM: {'svm__C': 10, 'svm__gamma': 0.01}
Mejores parámetros para Vpted Perceptron: {'perceptron__alpha': 0.0001, 'rbf_features__gamma': 0.01}


### Accuracy

In [5]:
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)

print("Accuracy SVM:", accuracy_svm)
print("Accuracy Voted Perceptron:", accuracy_perceptron)

Accuracy SVM: 0.9022082018927445
Accuracy Voted Perceptron: 0.6624605678233438


### Confusion Matrix

In [6]:
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)
conf_matrix_perceptron = confusion_matrix(y_test, y_pred_perceptron)

print("\nConfusion Matrix SVM:\n", conf_matrix_svm)
print("\nConfusion Matrix Voted Perceptron:\n", conf_matrix_perceptron)


Confusion Matrix SVM:
 [[81  5  0  0  0  0  0]
 [ 7 75  0  0  0  7  4]
 [ 0  1 98  2  0  0  1]
 [ 0  0  1 87  0  0  0]
 [ 0  0  0  1 97  0  0]
 [ 0 11  0  0  0 70  7]
 [ 0  3  3  0  0  9 64]]

Confusion Matrix Voted Perceptron:
 [[81  5  0  0  0  0  0]
 [26 55  0  3  2  0  7]
 [ 2  8 24  1 12  0 55]
 [ 0  0  1 84  0  0  3]
 [ 0  0  0  1 97  0  0]
 [24 24  0  0  2 19 19]
 [ 4 12  1  0  1  1 60]]


### Classification Reports

In [7]:
report_svm = classification_report(y_test, y_pred_svm)
report_perceptron = classification_report(y_test, y_pred_perceptron)

print("\nClassification Report SVM:\n", report_svm)
print("\nClassification Report Voted Perceptron Perceptron:\n", report_perceptron)


Classification Report SVM:
                      precision    recall  f1-score   support

Insufficient_Weight       0.92      0.94      0.93        86
      Normal_Weight       0.79      0.81      0.80        93
     Obesity_Type_I       0.96      0.96      0.96       102
    Obesity_Type_II       0.97      0.99      0.98        88
   Obesity_Type_III       1.00      0.99      0.99        98
 Overweight_Level_I       0.81      0.80      0.80        88
Overweight_Level_II       0.84      0.81      0.83        79

           accuracy                           0.90       634
          macro avg       0.90      0.90      0.90       634
       weighted avg       0.90      0.90      0.90       634


Classification Report Voted Perceptron Perceptron:
                      precision    recall  f1-score   support

Insufficient_Weight       0.59      0.94      0.73        86
      Normal_Weight       0.53      0.59      0.56        93
     Obesity_Type_I       0.92      0.24      0.38       102