In [None]:
# # Spotify Popularity - Neural Network (TensorFlow + KerasTuner)
import warnings
warnings.filterwarnings('ignore')
import sys  # ‚úì sys'i en ba≈üta import et
import os
sys.path.append('d:/spotifyTrendAnalysis')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # TensorFlow uyarƒ±larƒ±nƒ± gizle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#!pip install keras_tuner
import keras_tuner as kt
RANDOM_STATE = 24
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)
import importlib
if 'EngineerFeature' in sys.modules:
    importlib.reload(sys.modules['EngineerFeature'])
from EngineerFeature import FeatureEngineer

In [None]:
from sklearn.preprocessing import MinMaxScaler


df = pd.read_csv("../final_data.csv")
#df = df[:5000]
# y = df["popularity"].values.reshape(-1, 1)

# qt = QuantileTransformer(output_distribution="normal", random_state=42)
# df["popularity"] = qt.fit_transform(y).ravel()
# import seaborn as sns
# import matplotlib.pyplot as plt
# sns.histplot(df['popularity'], bins=30, kde=True)
# plt.title("Track Popularity Distribution")
# plt.xlabel("Popularity")
# plt.ylabel("Count")
# plt.show()

In [None]:
X = df.drop("popularity", axis=1)
y = df["popularity"]

print("Features shape:", X.shape)
print("Target shape:", y.shape)

In [None]:
# %%
# Veri b√∂l√ºnmesi: Train (60%) / Val (20%) / Test (20%)
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=RANDOM_STATE
)

# Feature Engineering uygula
fe = FeatureEngineer()
X_temp = fe.fit_transform(X_temp, y_temp)
X_test = fe.transform(X_test)

# Train ve Validation'ƒ± ayƒ±r
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.25,  # 0.25 * 0.8 = 0.2 (20% val)
    random_state=RANDOM_STATE
)

print("Train+Val:", X_temp.shape, y_temp.shape, "‚Üí %80 of data")
print("Train:    ", X_train.shape, y_train.shape, "‚Üí %60 of data")
print("Val:      ", X_val.shape,   y_val.shape, "‚Üí %20 of data")
print("Test:     ", X_test.shape,  y_test.shape, "‚Üí %20 of data")


In [None]:
# %%
def build_model(hp: kt.HyperParameters):
    model = keras.Sequential()
    model.add(layers.Input(shape=(42,)))

    # ka√ß tane hidden layer
    n_layers = hp.Int("n_layers", min_value=2, max_value=4)

    for i in range(n_layers):
        units = hp.Int(f"units_{i}", min_value=32, max_value=64, step=32)
        model.add(layers.Dense(units, activation="relu"))
        model.add(layers.BatchNormalization())
        dropout_rate = hp.Float(f"dropout_{i}", min_value=0.0, max_value=0.15, step=0.05)
        model.add(layers.Dropout(dropout_rate))

    # output layer (regression ‚Üí 1 n√∂ron)
    model.add(layers.Dense(1, activation="linear"))

    # learning rate
    lr = hp.Float("learning_rate", min_value=1e-4, max_value=1e-3, sampling="log")

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="mae", #keras.losses.Huber(),
        metrics=["mae"]
    )

    return model

In [None]:
# %%
# Eski tuner loglarƒ±nƒ± temizle (yeni aramaya ba≈ülamak i√ßin)
import shutil
import os
if os.path.exists("nn_tuner_logs"):
    shutil.rmtree("nn_tuner_logs")
    print("üóëÔ∏è Eski tuner loglarƒ± silindi, yeni arama ba≈ülƒ±yor...")

tuner = kt.BayesianOptimization(
    build_model,
    objective="val_loss",
    max_trials=10,           # 15 farklƒ± kombinasyon deneyecek
    directory="nn_tuner_logs",
    project_name="spotify_popularity_nn",
    overwrite=True           # Eski sonu√ßlarƒ±n √ºzerine yaz
)

tuner.search_space_summary()

In [None]:
# %%
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,           # istersen bunu da hp'ye a√ßabiliriz
    callbacks=[early_stop],
    verbose=1
)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best hyperparameters:", best_hps.values)

In [None]:
# %%
best_model = tuner.hypermodel.build(best_hps)

history = best_model.fit(
    X_temp, y_temp,
    validation_data=(X_test, y_test),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

In [None]:
# %%
import matplotlib.pyplot as plt

plt.figure()
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.legend()
plt.title("Training vs Validation Loss")
plt.show()
