AI MODEL TRAIN NOTEBOOK WITH SPOTIFY AUDIO FEATURES

In [None]:
import pandas as pd
import json
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load audio features from JSON file
with open('audio_features.json') as file:  # Replace 'audio_features.json' with the actual file name
    data = json.load(file)

# Convert JSON data to DataFrame
df_audio_features = pd.DataFrame(data)

# Display the first few rows to confirm it loaded correctly
print(df_audio_features.head())

In [None]:
print(len(df_audio_features))

In [None]:
# Generate sample mood labels to match the DataFrame length
# Relevant features for mood classification
features = [
    'acousticness', 'danceability', 'energy', 'instrumentalness', 
    'liveness', 'loudness', 'speechiness', 'tempo', 'valence'
]
X = df_audio_features[features]

moods = ['happy', 'sad', 'energetic', 'calm']
df_audio_features['mood'] = [moods[i % len(moods)] for i in range(len(df_audio_features))]

X = df_audio_features.select_dtypes(include=['number'])

# If you have a 'mood' column as your target variable, separate it
y = df_audio_features['mood'] if 'mood' in df_audio_features.columns else None

# Check the resulting DataFrame
print(X.head())

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Ensure X contains only the relevant numeric features

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
param_grid = {
    'n_neighbors': [50, 100, 200],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


In [None]:
best_k = grid_search.best_params_['n_neighbors']
print(f"Best number of neighbors (k): {best_k}")

In [None]:
best_knn_model = KNeighborsClassifier(n_neighbors=best_k, weights='distance', metric='manhattan')
best_knn_model.fit(X_train, y_train)

In [None]:
y_pred = best_knn_model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for KNN')
plt.show()