In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from keras.models import Model, Sequential
from keras.layers import Input, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict

In [15]:
# Cargar los datos desde el CSV
df = pd.read_csv(r'C:\Users\Fabian\Desktop\PROYECTO\canciones\caracteristicas_songs_etiquetadas.csv')
df.head()

Unnamed: 0,Nombre,Artista,Tempo,Tonalidad_1,Tonalidad_2,Tonalidad_3,RMS_Energy,Zero_Crossing_Rate,MFCC_1,MFCC_2,MFCC_3,Spectral_Centroid,Lable_sent
0,El Tema De La Película #Gru 3 Mi Villano Favo...,Morat,95.703125,0.428299,0.42898,0.332504,0.116094,0.080412,-128.26494,89.17829,10.359152,2297.262265,1
1,#3,Bad Bunny,107.666016,0.445583,0.318513,0.31202,0.190074,0.137812,-103.38222,48.632713,12.663226,3280.312481,0
2,+,Aitana,107.666016,0.423968,0.343299,0.262612,0.227739,0.119669,-44.26939,75.2498,5.470774,2795.030171,0
3,10 Minutos,Miki Núñez,161.499023,0.405336,0.504356,0.336348,0.27671,0.117588,-23.78698,80.55706,-7.478475,2613.423737,1
4,11,Corderas,89.102909,0.408914,0.347925,0.486865,0.21813,0.100745,-48.194275,87.60547,0.32563,2344.454151,0


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1084 entries, 0 to 1083
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Nombre              1084 non-null   object 
 1   Artista             1084 non-null   object 
 2   Tempo               1084 non-null   float64
 3   Tonalidad_1         1084 non-null   float64
 4   Tonalidad_2         1084 non-null   float64
 5   Tonalidad_3         1084 non-null   float64
 6   RMS_Energy          1084 non-null   float64
 7   Zero_Crossing_Rate  1084 non-null   float64
 8   MFCC_1              1084 non-null   float64
 9   MFCC_2              1084 non-null   float64
 10  MFCC_3              1084 non-null   float64
 11  Spectral_Centroid   1084 non-null   float64
 12  Lable_sent          1084 non-null   int64  
dtypes: float64(10), int64(1), object(2)
memory usage: 110.2+ KB


In [17]:
# Elimina las columnas 'Nombre' y 'Artista'
df = df.drop(columns=['Nombre', 'Artista'])

# Establece X (características) y y (etiqueta)
X = df.drop(columns=['Lable_sent']).values
y = df['Lable_sent'].values

In [18]:
# Paso 1: Preprocesamiento de datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [19]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Definir la entrada
inputs = Input(shape=(X_train.shape[1],))

In [20]:
def model1(inputs):
    x = Dense(128, activation='relu')(inputs)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model, 'Red Neuronal 1'

def model2(inputs):
    x = Dense(64, activation='relu')(inputs)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model, 'Red Neuronal 2'

In [21]:
# Función de optimización
def optimize_model(X_train, y_train, X_test, y_test, model_func):
    # Construir el modelo utilizando la función proporcionada
    model, model_name = model_func(inputs)
    # Compilar el modelo
    model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    # Entrenar el modelo
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test),
                        callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)])
    # Evaluar el modelo
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Modelo: {model_name}, Pérdida: {loss}, Precisión: {accuracy}")
    return model_name, model, history

In [22]:
# Seleccionar modelos neuronales para probar
neural_models = [
    model1,
    model2
]
# Seleccionar modelos no neuronales
non_neural_models = [
    (SVC(kernel='linear'), 'SVM'),
    (RandomForestClassifier(), 'Random Forest'),
    (XGBClassifier(), 'XGBoost'),
    (LogisticRegression(), 'Logistic Regression'),
    (KNeighborsClassifier(), 'K-Nearest Neighbors')
]

In [23]:
# Almacenar resultados de cada modelo
results = []

# Compilar y entrenar los modelos neuronales
for model_func in neural_models:
    model_name, model, history = optimize_model(X_train, y_train, X_test, y_test, model_func)
    results.append((model_name, model, history))

Epoch 1/100


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4532 - loss: 0.7125 - val_accuracy: 0.7834 - val_loss: 0.5447
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7896 - loss: 0.5176 - val_accuracy: 0.7880 - val_loss: 0.4712
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7680 - loss: 0.4817 - val_accuracy: 0.7972 - val_loss: 0.4482
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7682 - loss: 0.4707 - val_accuracy: 0.8018 - val_loss: 0.4379
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7741 - loss: 0.4674 - val_accuracy: 0.7926 - val_loss: 0.4350
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7991 - loss: 0.4183 - val_accuracy: 0.7926 - val_loss: 0.4321
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━

In [24]:
# Compilar y entrenar los modelos no neuronales
for model, model_name in non_neural_models:
    scores = cross_val_score(model, X_train, y_train, cv=5)
    mean_accuracy = scores.mean()
    # Calcular F1-score
    y_pred = cross_val_predict(model, X_train, y_train, cv=5)
    f1 = f1_score(y_train, y_pred)
    results.append((model_name, mean_accuracy, f1))

In [25]:
# Imprimir resultados de todos los modelos
for result in results:
    model_name = result[0]
    if isinstance(result[1], float):  # Si es un modelo no neuronal
        accuracy = result[1]
        print(f"Modelo: {model_name}, Accuracy: {accuracy}, F1-score: {f1}")
    else:  # Si es un modelo neuronal
        model = result[1]
        history = result[2]
        accuracy = history.history.get('val_accuracy')[-1] if history else None
        loss = history.history.get('val_loss')[-1] if history else None
        print(f"{model_name}: Accuracy: {accuracy}, Loss: {loss}")

Red Neuronal 1: Accuracy: 0.8202764987945557, Loss: 0.40915167331695557
Red Neuronal 2: Accuracy: 0.8156682252883911, Loss: 0.4139537811279297
Modelo: SVM, Accuracy: 0.778552920071756, F1-score: 0.3532934131736527
Modelo: Random Forest, Accuracy: 0.7820809248554913, F1-score: 0.3532934131736527
Modelo: XGBoost, Accuracy: 0.7693508736960999, F1-score: 0.3532934131736527
Modelo: Logistic Regression, Accuracy: 0.7889509002724072, F1-score: 0.3532934131736527
Modelo: K-Nearest Neighbors, Accuracy: 0.7509268487143712, F1-score: 0.3532934131736527


In [26]:
print(results)

[('Red Neuronal 1', <Functional name=functional_5, built=True>, <keras.src.callbacks.history.History object at 0x000002360405AD10>), ('Red Neuronal 2', <Functional name=functional_7, built=True>, <keras.src.callbacks.history.History object at 0x0000023605239110>), ('SVM', 0.778552920071756, 0.0), ('Random Forest', 0.7820809248554913, 0.32209737827715357), ('XGBoost', 0.7693508736960999, 0.3630573248407643), ('Logistic Regression', 0.7889509002724072, 0.3041825095057034), ('K-Nearest Neighbors', 0.7509268487143712, 0.3532934131736527)]


Modelo 1: Neural Network 1 - Loss: 0.4139, Accuracy: 0.8203

Modelo 2: Neural Network 2 - Loss: 0.4326, Accuracy: 0.8157

Modelo 3: SVM - Accuracy: 0.7788

Modelo 4: Random Forest - Accuracy: 0.8111

Modelo 5: XGBoost - Accuracy: 0.7926

Modelo 6: Logistic Regression - Accuracy: 0.8111

Modelo 7: K-Nearest Neighbors - Accuracy: 0.7788