In [11]:
import os
import librosa
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging

# Setup basic configuration for logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr).mean()
        rmse = librosa.feature.rms(y=y).mean()
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr).mean()
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y).mean()
        mfcc = librosa.feature.mfcc(y=y, sr=sr).mean(axis=1)
        features = {'tempo': tempo, 'chroma_stft': chroma_stft, 'rmse': rmse, 'spectral_centroid': spectral_centroid,
                    'spectral_bandwidth': spectral_bandwidth, 'rolloff': rolloff, 'zero_crossing_rate': zero_crossing_rate}
        for i, coef in enumerate(mfcc):
            features[f'mfcc_{i+1}'] = coef
        return features
    except Exception as e:
        logging.error(f"Failed to process {file_path}: {e}")
        return None  # Return None or a default dictionary if an error occurs

def process_directory(directory):
    features_list = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.mp3'):
            file_path = os.path.join(directory, file_name)
            features = extract_features(file_path)
            if features is not None:  # Only append if extraction was successful
                features['song'] = file_name
                features_list.append(features)
    print(f"Completed processing directory: {directory}")
    return features_list

def main():
    root_dir = r'D:\MMC-Project\song-recommender\backend\data'
    directories = [os.path.join(root_dir, f"{i:03d}") for i in range(156) if os.path.exists(os.path.join(root_dir, f"{i:03d}"))]
    with ThreadPoolExecutor(max_workers=52) as executor:
        results = executor.map(process_directory, directories)
    all_features = [feature for result in results for feature in result if feature is not None]
    df = pd.DataFrame(all_features)
    df.to_csv('song_features.csv', index=False)
    print("All directories processed and data saved to CSV.")

if __name__ == "__main__":
    main()



Completed processing directory: D:\MMC-Project\song-recommender\backend\data\002
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\008


  return pitch_tuning(


Completed processing directory: D:\MMC-Project\song-recommender\backend\data\034
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\005
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\031
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\020
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\023
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\015
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\050
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\018
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\009
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\022
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\041
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\028
Completed processing directo

  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
2024-05-10 02:04:54,499 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\099\099134.mp3: 


Completed processing directory: D:\MMC-Project\song-recommender\backend\data\070
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\000
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\014
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\066
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\102
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\010
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\012
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\101
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\060
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\004
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\063
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\103
Completed processing directo

2024-05-10 02:11:46,217 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\098\098565.mp3: 
2024-05-10 02:11:46,296 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\098\098567.mp3: 
2024-05-10 02:11:46,401 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\098\098569.mp3: 


Completed processing directory: D:\MMC-Project\song-recommender\backend\data\106
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\071
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\086
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\096
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\099
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\069
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\092


2024-05-10 02:14:06,555 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\133\133297.mp3: 


Completed processing directory: D:\MMC-Project\song-recommender\backend\data\087
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\105
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\151
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\073
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\097
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\064
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\155
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\136
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\153
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\117
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\154
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\147
Completed processing directo

2024-05-10 02:23:04,801 - ERROR - Failed to process D:\MMC-Project\song-recommender\backend\data\108\108925.mp3: 


Completed processing directory: D:\MMC-Project\song-recommender\backend\data\114
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\108
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\127
Completed processing directory: D:\MMC-Project\song-recommender\backend\data\126
All directories processed and data saved to CSV.


In [18]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the data
df = pd.read_csv('song_features.csv')

# Separate numeric and non-numeric data
numeric_cols = df.select_dtypes(include=[np.number]).columns
non_numeric_cols = df.columns.difference(numeric_cols)

# Check for missing values in numeric columns and fill them with the column mean
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# Normalize features in numeric columns only
scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Now df is properly preprocessed, with the 'song' column unchanged

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

# Split the data
X = df.drop('song', axis=1)
y = df['song']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Train the model
model = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
model.fit(X_train)


In [21]:
from joblib import dump

dump(model, 'nearest_neighbors_model.joblib')

['nearest_neighbors_model.joblib']

In [15]:
# Find nearest neighbors for a test set
distances, indices = model.kneighbors(X_test)

In [17]:
def recommend(song_index, n_recommendations=5):
    #given song
    print(f"Recommendations for {y.iloc[song_index]}")
    distances, indices = model.kneighbors(X.loc[song_index:song_index])
    return y.iloc[indices[0]]

# Recommend songs similar to the first song in the test set
recommendations = recommend(12)
print(recommendations)

Recommendations for 000203.mp3
1264    032325.mp3
2055    051113.mp3
3649    081792.mp3
54      000853.mp3
6205    125160.mp3
Name: song, dtype: object
