In [18]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import matplotlib.ticker as ticker

In [19]:
def hann_window(N):
    n = np.arange(N)
    return 0.5 * (1 - np.cos(2 * np.pi * n / (N - 1)))

def hamming_window(N):
    n = np.arange(N)
    return 0.54 - 0.46 * np.cos(2 * np.pi * n / (N - 1))

def rectangular_window(N):
    return np.ones(N)

In [20]:
def compute_stft(y, sr, window_func, N=512, hop_length=256):
    window = window_func(N)
    D = librosa.stft(y, n_fft=N, hop_length=hop_length, window=window)
    freqs = librosa.fft_frequencies(sr=sr, n_fft=N)
    times = librosa.frames_to_time(np.arange(D.shape[1]), sr=sr, hop_length=hop_length, n_fft=N)
    return D, freqs, times


def compute_spectral_metrics(D, y, sr):
    spectral_centroid = librosa.feature.spectral_centroid(S=np.abs(D), sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(S=np.abs(D), sr=sr)
    return spectral_centroid, spectral_bandwidth


def compute_correctness_metrics(y, D, sr):
    noise_floor = np.mean(np.abs(D)) * 0.1
    snr = 10 * np.log10(np.mean(y ** 2) / (noise_floor ** 2))
    spectral_leakage = np.sum(np.abs(D)) / np.max(np.abs(D))
    rmse = np.sqrt(np.mean((np.abs(D) - np.abs(y[:D.shape[1]])) ** 2))
    return snr, spectral_leakage, rmse

In [21]:
def plot_spectrogram(D, freqs, times, title, sr, hop_length, save_path):
    plt.figure(figsize=(16, 4))
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='hz', cmap='viridis')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.xlabel('Time (minutes)')
    plt.ylabel('Frequency (Hz)')
    plt.tight_layout()
    
    
    ax = plt.gca()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x/60:.1f}'))
    
    plt.savefig(save_path)
    plt.close()


def plot_waveform(y, sr, title, save_path):
    plt.figure(figsize=(16, 4))
    times = np.arange(len(y)) / sr
    plt.plot(times, y, color='steelblue')
    plt.title(title)
    plt.xlabel('Time (minutes)')
    plt.ylabel('Amplitude')
    plt.tight_layout()
    

    ax = plt.gca()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x/60:.1f}'))
    
    plt.savefig(save_path)
    plt.close()

In [22]:
songs = [
    {'genre': 'Classical', 'title': 'Pehla Nasha', 'file_path': '../data/songs/Pehla Nasha.mp3'},
    {'genre': 'Jazz', 'title': 'Dil Ko Hazar Bar', 'file_path': '../data/songs/Dil Ko Hazar Bar.mp3'},
    {'genre': 'Rock', 'title': 'Pichle Saat Dino Mein', 'file_path': '../data/songs/Pichle Saat Dinon Mein.mp3'},
    {'genre': 'Electronic', 'title': 'Lean On', 'file_path': '../data/songs/Lean On.mp3'}
]

output_dir = "../output/song_spectrogram"
os.makedirs(output_dir, exist_ok=True)

window_types = {
    'Hann Window': hann_window,
    'Hamming Window': hamming_window,
    'Rectangular Window': rectangular_window
}

results = []

for song in songs:
    genre = song['genre']
    title = song['title']
    file_path = song['file_path']
    
    print(f"\nProcessing '{title}' from the {genre} genre.")
    

    y, sr = librosa.load(file_path, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)
    print(f"Loaded '{title}' successfully. Duration: {duration/60:.2f} minutes.")

    waveform_save_path = os.path.join(output_dir, f"{title}_waveform.png")
    plot_waveform(y, sr, f"{title} - Waveform", waveform_save_path)
    print(f"Waveform for '{title}' saved at: {waveform_save_path}")
    
    for window_name, window_func in window_types.items():
        print(f"Generating spectrogram using {window_name}...")
        D, freqs, times = compute_stft(y, sr, window_func, N=512, hop_length=256)
        save_path = os.path.join(output_dir, f"{title}_{window_name}.png")
        plot_spectrogram(D, freqs, times, f"{title} - {genre} Genre - {window_name}", sr, hop_length=256, save_path=save_path)
        

        spectral_centroid, spectral_bandwidth = compute_spectral_metrics(D, y, sr)
        snr, spectral_leakage, rmse = compute_correctness_metrics(y, D, sr)
        
        results.append((title, genre, window_name, snr, spectral_leakage, rmse, 
                        np.mean(spectral_centroid), np.mean(spectral_bandwidth)))



Processing 'Pehla Nasha' from the Classical genre.


  y, sr = librosa.load(file_path, sr=None)


Loaded 'Pehla Nasha' successfully. Duration: 4.89 minutes.
Waveform for 'Pehla Nasha' saved at: ../output/song_spectrogram/Pehla Nasha_waveform.png
Generating spectrogram using Hann Window...
Generating spectrogram using Hamming Window...
Generating spectrogram using Rectangular Window...

Processing 'Dil Ko Hazar Bar' from the Jazz genre.
Loaded 'Dil Ko Hazar Bar' successfully. Duration: 5.57 minutes.
Waveform for 'Dil Ko Hazar Bar' saved at: ../output/song_spectrogram/Dil Ko Hazar Bar_waveform.png
Generating spectrogram using Hann Window...
Generating spectrogram using Hamming Window...
Generating spectrogram using Rectangular Window...

Processing 'Pichle Saat Dino Mein' from the Rock genre.
Loaded 'Pichle Saat Dino Mein' successfully. Duration: 3.21 minutes.
Waveform for 'Pichle Saat Dino Mein' saved at: ../output/song_spectrogram/Pichle Saat Dino Mein_waveform.png
Generating spectrogram using Hann Window...
Generating spectrogram using Hamming Window...
Generating spectrogram usin

In [23]:
columns = ['Title', 'Genre', 'Window', 'SNR (dB)', 'Spectral Leakage', 'RMSE',
           'Spectral Centroid', 'Spectral Bandwidth']
df_results = pd.DataFrame(results, columns=columns)
report_path = os.path.join(output_dir, "spectrogram_analysis_report.csv")
df_results.to_csv(report_path, index=False)

print(f"\nAnalysis report saved at: {report_path}")


Analysis report saved at: ../output/song_spectrogram/spectrogram_analysis_report.csv
