In [14]:
import streamlit as st
import pandas as pd
import librosa
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import skew, kurtosis
import joblib

# Load your dataset and model
#@st.cache_data 
def load_data_and_model():
    # Replace with your actual dataset
    clustering_model = joblib.load("clustering_model.pkl")
    songs_df = pd.read_csv("recommendation_features1.csv")  # Your dataset with song features
    return songs_df, clustering_model

songs_df, clustering_model = load_data_and_model()

# Function to extract spectral features from an audio file
def extract_features(audio_file, sr=22050):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_file, sr=sr)
        
        # Spectral Features
        features = {}
        
        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features['mfcc_mean'] = np.mean(mfccs, axis=1)
        features['mfcc_var'] = np.var(mfccs, axis=1)

        # Feature Trajectories
        mfcc_delta = librosa.feature.delta(mfccs)
        features['mfcc_delta_mean'] = np.mean(mfcc_delta, axis=1)
        features['mfcc_delta_var'] = np.var(mfcc_delta, axis=1)
        
        # Spectral Centroid
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        features['spectral_centroid_mean'] = np.mean(spectral_centroid)
        features['spectral_centroid_var'] = np.var(spectral_centroid)
        
        # Spectral Roll-off
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)
        features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
        features['spectral_rolloff_var'] = np.var(spectral_rolloff)
        
        # Spectral Flux
        spectral_flux = librosa.onset.onset_strength(y=y, sr=sr)
        features['spectral_flux_mean'] = np.mean(spectral_flux)
        features['spectral_flux_var'] = np.var(spectral_flux)
        
        # Spectral Contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        features['spectral_contrast_mean'] = np.mean(spectral_contrast, axis=1)
        features['spectral_contrast_var'] = np.var(spectral_contrast, axis=1)

        envelope = np.abs(y)
        features['envelope_mean'] = np.mean(envelope)
        features['envelope_var'] = np.var(envelope)

        # Spectral Bandwidth
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
        features['spectral_bandwidth_var'] = np.var(spectral_bandwidth)

        # Time-Domain Features
        features['rms_mean'] = np.mean(librosa.feature.rms(y=y))
        features['rms_var'] = np.var(librosa.feature.rms(y=y))
        features['energy'] = np.sum(y ** 2) / len(y)  # Signal energy
        features['amplitude_mean'] = np.mean(np.abs(y))
        features['amplitude_var'] = np.var(np.abs(y))

        # Temporal Evolution
        dynamic_range = np.max(y) - np.min(y)
        features['dynamic_range'] = dynamic_range

        # Rhythm Features
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        features['tempo'] = tempo
        beat_strength = librosa.onset.onset_strength(y=y, sr=sr)
        features['beat_strength_mean'] = np.mean(beat_strength)
        features['beat_strength_var'] = np.var(beat_strength)

        # Rhythmic Regularity
        if len(beat_frames) > 1:
            # Inter-Beat Interval (IBI) Variability
            ibi = np.diff(beat_frames) / sr  # Convert frame difference to seconds
            features['ibi_var'] = np.var(ibi)
            features['ibi_mean'] = np.mean(ibi)

        # Harmonic Features
        # Key and Scale Estimation
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
        features['chroma_mean'] = np.mean(chroma, axis=1)
        features['chroma_var'] = np.var(chroma, axis=1)
        
        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        features['tonnetz_mean'] = np.mean(tonnetz, axis=1)
        features['tonnetz_var'] = np.var(tonnetz, axis=1)

        # Harmonic-to-Percussive Ratio
        harmonic, percussive = librosa.effects.hpss(y)
        hpr = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
        features['hpr'] = hpr

        # Genre-Specific Features
        # Harmonic and Percussive Energy
        features['harmonic_energy'] = np.sum(harmonic ** 2)
        features['percussive_energy'] = np.sum(percussive ** 2)

        # Zero-Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)
        features['zcr_mean'] = np.mean(zcr)
        features['zcr_var'] = np.var(zcr)

        # Onset Autocorrelation (Rhythmic Regularity)
        onset_acf = librosa.autocorrelate(librosa.onset.onset_strength(y=y, sr=sr), max_size=len(y)//2)
        features['onset_acf_mean'] = np.mean(onset_acf)
        features['onset_acf_var'] = np.var(onset_acf)

        # Statistics (Skewness and Kurtosis)
        features['skewness'] = skew(y)
        features['kurtosis'] = kurtosis(y)
        
        # Combine features into a single vector
        feature_vector = np.hstack(list(features.values()))
        return feature_vector
    except Exception as e:
        st.error(f"Error processing audio: {e}")
        return None



# Add clustering logic to the recommendation function
def recommend_songs(features, songs_df, clustering_model, num_recommendations=5):
    # Extract the feature columns from the dataset
    feature_cols = [col for col in songs_df.columns if col.startswith("feature_")]

    # Predict the cluster of the input song
    predicted_cluster = clustering_model.predict([features])[0]

    # Filter dataset to include only songs from the same cluster
    cluster_songs = songs_df[songs_df['cluster'] == predicted_cluster]

    # Compute similarity (e.g., cosine similarity) within the cluster
    cluster_features = cluster_songs[feature_cols].values
    similarities = cosine_similarity([features], cluster_features).flatten()
    cluster_songs['similarity'] = similarities

    # Sort by similarity and return top recommendations
    recommendations = cluster_songs.sort_values(by='similarity', ascending=False)
    return recommendations[['file_name', 'similarity', 'audio_path']].head(num_recommendations)


# Streamlit UI
st.title("Song Recommendation App 🎵")
st.write("Upload a song to get 5 similar song recommendations!")

# File uploader
uploaded_file = st.file_uploader("Upload a song file (MP3, WAV, etc.)", type=["mp3", "wav"])

if uploaded_file:
    # Display an audio player for the uploaded file
    st.subheader("Uploaded Song Preview:")
    st.audio(uploaded_file, format="audio/mp3")  # Play the uploaded song

    # Extract features from the uploaded file
    st.write("Processing uploaded song...")
    song_features = extract_features(uploaded_file)

    if song_features is not None:
        st.write("Finding similar songs...")

        # Recommend similar songs (with clustering logic)
        recommendations = recommend_songs(song_features, songs_df, clustering_model)

        st.write("Here are 5 similar songs:")

        # Display recommendations with audio playback
        for _, row in recommendations.iterrows():
            st.subheader(row['file_name'])
            st.write(f"Similarity Score: {row['similarity']:.2f}")
            st.audio(row['audio_path'], format="audio/mp3")
    else:
        st.error("Failed to extract features from the uploaded file.")



2025-01-22 13:21:31.604 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new
caching commands, `st.cache_data` or `st.cache_resource`. More information
[in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching
logic used by `st.cache_data` and `st.cache_resource`. This might lead to some problems
or unexpected behavior in certain edge cases.

2025-01-22 13:21:31.605 No runtime found, using MemoryCacheStorageManager


In [9]:
file1 = extract_features('unlabeled\m00372.wav')	

  file1 = extract_features('unlabeled\m00372.wav')


In [10]:
file1

{'mfcc_mean': array([-15.625049,  99.14236 , -45.630257,  55.01213 , -18.020164,
         23.897812, -19.600197,  27.33792 , -15.103914,  18.797401,
        -16.146687,   9.214215, -16.21849 ], dtype=float32),
 'mfcc_var': array([360.38052 , 164.04086 , 135.57751 ,  70.57669 ,  29.895018,
         45.264465,  35.989407,  39.737305,  30.706875,  31.762339,
         23.08019 ,  46.400738,  24.95767 ], dtype=float32),
 'mfcc_delta_mean': array([ 4.4510301e-02, -2.0789739e-02,  1.6915424e-02, -1.9022228e-02,
         3.9327713e-03,  1.5005129e-02, -2.6202472e-03,  1.3987110e-02,
        -9.4788847e-04,  1.3330526e-02,  4.7453549e-03, -3.3032277e-04,
         2.3585369e-05], dtype=float32),
 'mfcc_delta_var': array([6.6074505 , 4.671716  , 2.840617  , 1.8381273 , 0.84295166,
        0.85338914, 1.206587  , 0.9038652 , 1.0534309 , 0.76987416,
        0.81312954, 0.9787554 , 0.88086015], dtype=float32),
 'spectral_centroid_mean': np.float64(2442.504289617461),
 'spectral_centroid_var': np.flo

In [11]:
!python -m streamlit hello

^C


In [9]:
!streamlit run recommendation_app.py

^C


In [8]:
!streamlit run test_app.py


^C


In [17]:
import pandas as pd
import os

# Load your dataset
songs_df = pd.read_csv("recommendation_features.csv")

# Define the folder where audio files are stored
audio_folder = "unlabeled"

# Generate the audio_path column by mapping filenames
songs_df['audio_path'] = songs_df['file_name'].apply(
    lambda name: os.path.join(audio_folder, f"{name}")  # Adjust the extension if needed
)

# Save the updated dataset
songs_df.to_csv("recommendation_features1.csv", index=False)
print("Audio paths added successfully!")


Audio paths added successfully!
