In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import classification_report
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical


In [None]:
all_tracks = pd.read_csv('all_kendrick_tracks.csv')
album_tracks = pd.read_csv('kendrick_album_tracks.csv')
singles_features = pd.read_csv('kendrick_singles_features.csv')

all_tracks.head()
album_tracks.head()
singles_features.head()

**Audio Features Analysis**

In [None]:
audio_features = all_tracks[['danceability', 'energy', 'valence']]
sns.pairplot(audio_features)
plt.suptitle('Audio Features Analysis', y=1.02)
plt.show()

**Correlation Analysis**
how the numeric feaures in the dataset such as:


*   Duration (ms)
*   Popularity
* Danceability
* Energy
* Valence
* Acousticness
* Instrumentalness
* Liveness
* Release year
*Duration (min)



In [None]:
numeric_df = all_tracks.select_dtypes(include=[np.number])
correlation_matrix = numeric_df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Analysis')
plt.show()

**Load and Preprocess the Data**


In [None]:
features = ['danceability', 'energy', 'valence', 'loudness', 'acousticness', 'speechiness', 'instrumentalness']
x = all_tracks[features].values
y = all_tracks['explicit'].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

scaler = MinMaxScaler()
x_normalized = scaler.fit_transform(x)

x_reshaped = x_normalized.reshape(x_normalized.shape[0], x_normalized.shape[1], 1)

x_train, x_test, y_train, y_test = train_test_split(x_reshaped, y_categorical, test_size=0.2, random_state=42)



**CNN Model**


In [None]:
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(x_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(filters=64, kernel_size=2, activation='relu'),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y_train.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

**Train and Evaluate the Model**


In [None]:
history = model.fit(x_train, y_train, epochs=30, batch_size=32, validation_split=0.2)

test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

**Make Predictions**


In [None]:
predictions = model.predict(x_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

predicted_popularity = label_encoder.inverse_transform(predicted_classes)
true_popularity = label_encoder.inverse_transform(true_classes)

for i in range(5):
  print(f"True Popularity: {true_popularity[i]}, Predicted Popularity: {predicted_popularity[i]}")

print(all_tracks['popularity'].value_counts)

**Visualization of Results**

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Model Loss')
plt.show()

print(classification_report(true_classes, predicted_classes))


**Final Notes**
from the results we can see that their is a consistent True prediction, this is ideal for results based but also indicates limitations in testing or data diversity.


---

