## Instalar dependencias

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
import joblib
from sklearn import linear_model
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

## Leer Datos y definir columnas

In [10]:
size_filename = "penguins_size.csv"

df = pd.read_csv("penguins_size.csv").dropna()

continuous = ["culmen_length_mm", "culmen_depth_mm",
    "flipper_length_mm", "body_mass_g"]

categorical = ["species","island","sex"]

df.head()

Unnamed: 0,species,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,MALE


## Preprocesar datos

In [9]:
def transform_variables(df,variables_categoricas,variables_continuas):

    # Label Encoder

    le = LabelEncoder()
    for variable in variables_categoricas:
        df[variable] = le.fit_transform(df[variable])
    
    # Scale continuous variables

    scaler = Normalizer()
    for variable in variables_continuas:
        df[variable] = scaler.fit_transform(df[variables_continuas])
    
    return df
    
df_pros = transform_variables(df,categorical,continuous)

df_pros.head()


Unnamed: 0,species,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,0,2,0.010414,3e-06,3e-06,3e-06,2
1,0,2,0.010382,3e-06,3e-06,3e-06,1
2,0,2,0.012377,4e-06,4e-06,4e-06,1
4,0,2,0.01062,3e-06,3e-06,3e-06,1
5,0,2,0.010752,3e-06,3e-06,3e-06,2


In [13]:
def data_split_model(df,target):

    # Train Test Split
    
    y = df[target]
    X = df.drop([target], axis=1)

    X_train, X_test,y_train, y_test = train_test_split(X,y , 
                                   random_state=50,  
                                   test_size=0.30) 

    return X_train, X_test,y_train, y_test

X_train, X_test,y_train, y_test = data_split_model(df_pros,'species')
X_train.head()

Unnamed: 0,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
130,2,0.011559,3e-06,3e-06,3e-06,1
151,1,0.010361,3e-06,3e-06,3e-06,2
15,2,0.009879,3e-06,3e-06,3e-06,1
95,1,0.009477,2e-06,2e-06,2e-06,2
132,1,0.010498,3e-06,3e-06,3e-06,1


## Entrenamiento de Modelos

In [16]:
# Define parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1],
}

# Initialize SVM
svm = SVC()

# Grid Search with cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'C': 1, 'gamma': 1}
