In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

In [2]:
train_df = pd.read_csv(r'C:\Users\user\Desktop\birdclef-2022\train_df_clean.csv')
test_df = pd.read_csv(r'C:\Users\user\Desktop\birdclef-2022\test_df_clean.csv')

In [3]:
def generate_spectrogram_image(audio_path, image_save_path, label):
    y, sr = librosa.load(audio_path)
    S = librosa.feature.melspectrogram(y=y, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.axis('off')  # Don't display axes for cleaner spectrogram images
    plt.savefig(image_save_path, bbox_inches='tight', pad_inches=0)
    plt.close()  # Close the plot to free up memory

In [4]:
# Define the base path where we want to save the spectrogram images, organized by label
spectrogram_base_path = r'C:\Users\user\Desktop\birdclef-2022\spectrograms_train'

for index, row in train_df.iterrows():
    audio_path = row['path']  # Path to the .ogg file
    label = row['primary_label']  # The label for the audio file
    
    # Create a directory for the label if it doesn't exist
    label_dir_path = os.path.join(spectrogram_base_path, label)
    if not os.path.exists(label_dir_path):
        os.makedirs(label_dir_path)
    
    # Construct the filename for the spectrogram image
    filename = os.path.basename(audio_path).replace('.ogg', '.png')
    image_save_path = os.path.join(label_dir_path, filename)
    
    # Generate and save the spectrogram image
    generate_spectrogram_image(audio_path, image_save_path, label)

In [5]:
spectrogram_base_path = r'C:\Users\user\Desktop\birdclef-2022\spectrograms_test'

for index, row in test_df.iterrows():
    audio_path = row['path']  # Path to the .ogg file
    label = row['primary_label']  # The label for the audio file
    
    # Create a directory for the label if it doesn't exist
    label_dir_path = os.path.join(spectrogram_base_path, label)
    if not os.path.exists(label_dir_path):
        os.makedirs(label_dir_path)
    
    # Construct the filename for the spectrogram image
    filename = os.path.basename(audio_path).replace('.ogg', '.png')
    image_save_path = os.path.join(label_dir_path, filename)
    
    # Generate and save the spectrogram image
    generate_spectrogram_image(audio_path, image_save_path, label)

In [None]:
import random
from PIL import Image

def plot_spectrogram_samples(base_path, dataset_name, n_samples=5):
    labels = os.listdir(base_path)

    plt.figure(figsize=(15, 3 * n_samples))
    
    for i in range(n_samples):
        label = random.choice(labels)
        label_path = os.path.join(base_path, label)
        
        spectrogram_files = os.listdir(label_path)
        
        selected_file = random.choice(spectrogram_files)
        image_path = os.path.join(label_path, selected_file)
        
        img = Image.open(image_path)
        plt.subplot(n_samples, 1, i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"{dataset_name} Spectrogram: {label}")

    plt.tight_layout()
    plt.show()

spectrogram_base_path_train = r'C:\Users\user\Desktop\birdclef-2022\spectrograms_train'
plot_spectrogram_samples(spectrogram_base_path_train, "Train")

spectrogram_base_path_test = r'C:\Users\user\Desktop\birdclef-2022\spectrograms_test'
plot_spectrogram_samples(spectrogram_base_path_test, "Test")
