In [2]:
pip install pandas scikit-learn tensorflow


Collecting pandas
  Downloading pandas-2.3.1-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting tensorflow
  Downloading tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.3.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.16.0-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloadin


[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [5]:
# Load the dataset
df = pd.read_csv("dataset/personalized_music_recommendation_dataset.csv")

In [6]:
features = [
    "genre", "artist", "language", "tempo", "energy", "danceability", 
    "acousticness", "instrumentalness", "valence", "liveness", 
    "speechiness", "loudness", "lyrics_sentiment", "emotion_tag"
]

In [7]:
target = "liked"

In [8]:
# Drop rows with missing values in selected columns
df = df[features + [target]].dropna()


In [9]:
# Encode categorical features
label_encoders = {}
categorical_cols = ["genre", "artist", "language", "lyrics_sentiment", "emotion_tag"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [10]:
# Separate features and labels
X = df[features]
y = df[target]

# Standardize numerical features
numerical_cols = list(set(X.columns) - set(categorical_cols))
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numerical_cols] = scaler.fit_transform(X[numerical_cols])


In [11]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
# Define the deep learning model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Binary output: liked or not
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [14]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2, verbose=1)


Epoch 1/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6862 - loss: 0.6329 - val_accuracy: 0.7010 - val_loss: 0.6137
Epoch 2/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7027 - loss: 0.6134 - val_accuracy: 0.7010 - val_loss: 0.6119
Epoch 3/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7006 - loss: 0.6130 - val_accuracy: 0.7010 - val_loss: 0.6119
Epoch 4/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7044 - loss: 0.6087 - val_accuracy: 0.7010 - val_loss: 0.6113
Epoch 5/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7015 - loss: 0.6105 - val_accuracy: 0.7010 - val_loss: 0.6109
Epoch 6/10
[1m702/702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7022 - loss: 0.6095 - val_accuracy: 0.7010 - val_loss: 0.6106
Epoch 7/10
[1m702/702[0m 

In [15]:
# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 916us/step
              precision    recall  f1-score   support

           0       0.70      1.00      0.82      9845
           1       0.00      0.00      0.00      4181

    accuracy                           0.70     14026
   macro avg       0.35      0.50      0.41     14026
weighted avg       0.49      0.70      0.58     14026



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [16]:
model.save("user_preference_model.h5")



In [18]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import load_model

# === Load saved model ===
model = load_model("user_preference_model.h5")  # Make sure you save your model first

# === Load encoders and scalers ===
# These should match what you used during training
label_columns = ["genre", "artist", "language", "lyrics_sentiment", "emotion_tag"]
numerical_columns = ["tempo", "energy", "danceability", "acousticness", "instrumentalness", 
                     "valence", "liveness", "speechiness", "loudness"]

# Load new data (user listening history)
df_new = pd.read_csv("dataset/user_history.csv")

# Apply label encoding (same as training)
label_encoders = {}
for col in label_columns:
    le = LabelEncoder()
    df_new[col] = le.fit_transform(df_new[col])
    label_encoders[col] = le

# Standardize numerical columns
scaler = StandardScaler()
df_new[numerical_columns] = scaler.fit_transform(df_new[numerical_columns])

# Predict using the trained model
predictions = model.predict(df_new)
df_new["predicted_like"] = (predictions > 0.5).astype(int)

# Save result
df_new.to_csv("dataset/predicted_user_history.csv", index=False)
print("Prediction saved to predicted_user_history.csv")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Prediction saved to predicted_user_history.csv


In [17]:
# Re-import necessary library after code execution state reset
import pandas as pd

# Create a sample user_history DataFrame
sample_data = {
    "genre": ["Pop", "Rock", "Jazz"],
    "artist": ["Ed Sheeran", "Imagine Dragons", "Norah Jones"],
    "language": ["English", "English", "English"],
    "tempo": [120, 135, 90],
    "energy": [0.75, 0.85, 0.4],
    "danceability": [0.8, 0.7, 0.3],
    "acousticness": [0.2, 0.1, 0.6],
    "instrumentalness": [0.0, 0.0, 0.0],
    "valence": [0.9, 0.8, 0.5],
    "liveness": [0.1, 0.3, 0.2],
    "speechiness": [0.05, 0.06, 0.04],
    "loudness": [-5.0, -4.5, -7.0],
    "lyrics_sentiment": ["Positive", "Neutral", "Sad"],
    "emotion_tag": ["Happy", "Energetic", "Sad"]
}

user_history_df = pd.DataFrame(sample_data)

# Save to CSV
user_history_csv_path = "dataset/user_history.csv"
user_history_df.to_csv(user_history_csv_path, index=False)

user_history_csv_path

'dataset/user_history.csv'