<a href="https://colab.research.google.com/github/dynamo13007/Delta-Hacks/blob/main/MUSIC_RECOMMENDATION_SYSTEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Collecting dataset of music tracks that includes features such as genre, artist, tempo, duration, and more. 
Using existing datasets Spotify API to collect this data.

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

# Set up Spotify API credentials
client_id = 'your_client_id_here'
client_secret = 'your_client_secret_here'
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Search for tracks and get their audio features
tracks = sp.search(q='genre:"pop"', type='track', limit=10000)
track_ids = [track['id'] for track in tracks['tracks']['items']]
features = sp.audio_features(track_ids)

# Create a dataframe of the audio features
df = pd.DataFrame(features)
df = df.drop(columns=['type', 'id', 'uri', 'track_href', 'analysis_url'])


ModuleNotFoundError: ignored

Pre-processing the data to clean it and prepare it for analysis

In [None]:
#removing duplicates
df = df.drop_duplicates()

#dropping rows with missing values
df = df.dropna()

#filling missing values with a specific value (e.g., 0)
df = df.fillna(0)

In [None]:
# Normalizing Data
from sklearn.preprocessing import MinMaxScaler

# Select the columns to normalize
cols_to_normalize = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Apply the scaler to the selected columns
df[cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])

In [None]:
# Performing dimensionality reduction 
from sklearn.decomposition import PCA

# Select the relevant features for analysis
selected_features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

# Create a new dataframe with only the selected features
df_selected = df[selected_features]

# Apply PCA to the selected features
pca = PCA(n_components=3)
pca.fit(df_selected)
df_pca = pd.DataFrame(pca.transform(df_selected), columns=['PCA1', 'PCA2', 'PCA3'])

In [None]:
# Using K-means clustering to group music tracks
from sklearn.cluster import KMeans

# Specify the number of clusters
n_clusters = 5

# Fit the K-means model to the PCA-transformed data
kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(df_pca)

# Add the cluster labels to the original dataframe
df['cluster'] = kmeans.labels_

# Print the size of each cluster
print(df['cluster'].value_counts())

In [None]:
# Using silhouette score and elbow method to evaluate the performance of your music recommendation system
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score

# Calculate the silhouette score
silhouette_avg = silhouette_score(df_pca, kmeans.labels_)
print(f'Silhouette score: {silhouette_avg}')

# Use the elbow method to determine the optimal number of clusters
sse = []
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df_pca)
    sse.append(kmeans.inertia_)

plt.plot(range(1, 10), sse)
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('SSE')
plt.show()

In [None]:
#
from sklearn.metrics import precision_score, recall_score, f1_score

# Generate ground truth labels
ground_truth = [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]
'''
we can automate it using pandas : 
data = pd.read_csv('labeled_data.csv')

# Get ground truth labels
ground_truth = data['label'].tolist()

print(ground_truth)
'''

# Get predicted labels from clustering algorithm
predicted_labels = kmeans.labels_

# Calculate precision, recall, and F1-score
precision = precision_score(ground_truth, predicted_labels, average='macro')
recall = recall_score(ground_truth, predicted_labels, average='macro')
f1 = f1_score(ground_truth, predicted_labels, average='macro')

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-score: {f1:.2f}')
