In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np

In [2]:
df = pd.read_csv('../../../data/processed/cleaned_music_population.csv')

In [3]:
df_rap = df[df['music_genre'] == 'Rap']
df_rap.shape, df_rap.head()

((4504, 13),
        popularity  acousticness  danceability  energy  instrumentalness key  \
 22465        51.0      0.000469         0.683   0.769          0.002260   B   
 22466        63.0      0.354000         0.898   0.677          0.000005  C#   
 22467        61.0      0.400000         0.578   0.528          0.000000  G#   
 22468        71.0      0.000192         0.543   0.952          0.000008  G#   
 22469        17.0      0.003550         0.589   0.721          0.032500   G   
 
        liveness  loudness   mode  speechiness    tempo  valence music_genre  
 22465     0.303    -7.108  Minor       0.2270  142.827    0.475         Rap  
 22466     0.449    -6.907  Major       0.3620  155.020    0.864         Rap  
 22467     0.149    -7.169  Major       0.2740   98.555    0.622         Rap  
 22468     0.380    -3.650  Major       0.0668  107.942    0.591         Rap  
 22469     0.193   -12.145  Major       0.1930   88.049    0.480         Rap  )

In [4]:
df_onehot = pd.get_dummies(df_rap, drop_first=True)
df_onehot.shape, df.shape

((4504, 22), (45020, 13))

In [5]:
#Eliminating outliers
df_onehot = df_onehot[df_onehot['popularity'] > 0]

In [6]:
X = df_onehot.drop(columns=['popularity'])
y = df_onehot['popularity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [7]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=20, activation='relu'))
ann.add(tf.keras.layers.Dense(units=20, activation='relu'))

In [8]:
ann.add(tf.keras.layers.Dense(units=1, activation='linear'))

In [9]:
ann.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error', 'mean_absolute_percentage_error','r'])

In [11]:
ann.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error', 'mean_absolute_percentage_error'])

In [12]:
ann.fit(X_train_scaled, y_train, epochs=100, batch_size=50, validation_split=0.2)

Epoch 1/100


[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3674.1655 - mean_absolute_error: 60.0677 - mean_absolute_percentage_error: 99.0964 - val_loss: 3379.0859 - val_mean_absolute_error: 57.5795 - val_mean_absolute_percentage_error: 95.3218
Epoch 2/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3297.7795 - mean_absolute_error: 56.8110 - mean_absolute_percentage_error: 93.6461 - val_loss: 2790.6587 - val_mean_absolute_error: 52.1878 - val_mean_absolute_percentage_error: 86.2558
Epoch 3/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2613.6458 - mean_absolute_error: 50.3150 - mean_absolute_percentage_error: 82.9890 - val_loss: 1858.5292 - val_mean_absolute_error: 42.1485 - val_mean_absolute_percentage_error: 69.3700
Epoch 4/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1661.5940 - mean_absolute_error: 39.4052 - mean_absolute_percentage_error: 64.3809

<keras.src.callbacks.history.History at 0x27f67b106d0>

In [13]:
ann.summary()

In [15]:
y_pred = ann.predict(X_test_scaled)

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [17]:
def calcular_r2(y_test, y_pred):
    """
    Calcula el coeficiente de determinación R^2 entre los valores reales y los predichos.

    Parámetros:
    y_test (array-like): Valores reales.
    y_pred (array-like): Valores predichos.

    Retorna:
    float: Valor de R^2.
    """
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    ss_res = np.sum((y_test - y_pred) ** 2)
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

In [18]:
r2 = calcular_r2(y_test, y_pred)
print(f'R^2: {r2}')

R^2: -1033.4746572136726
