# Importy

In [None]:
import os
import numpy as np
import tensorflow as tf
import librosa
import librosa.display
import soundfile as sf
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
from matplotlib import pyplot as plt
from scipy.ndimage import median_filter
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Zamiana mp3 na wav

In [None]:
# Path to the folder containing MP3 files
mp3_folder = "data\\"

# Path to the folder where WAV files will be saved
wav_folder = "wav\\"

# Ensure the target directory exists
if not os.path.exists(wav_folder):
    os.makedirs(wav_folder)

# Process all MP3 files in the source directory
for file in os.listdir(mp3_folder):
    if file.endswith(".mp3"):
        mp3_path = os.path.join(mp3_folder, file)
        wav_path = os.path.join(wav_folder, os.path.splitext(file)[0] + ".wav")
        
        try:
            # Load MP3 file
            audio = AudioSegment.from_mp3(mp3_path)
            
            # Export as WAV
            audio.export(wav_path, format="wav")
            print(f"File {file} converted successfully.")
        except CouldntDecodeError:
            print(f"Failed to convert {file}. The file is corrupted or unsupported.")
            continue

print("Conversion completed.")


# Funkcje wykorzystywane do przetwarzania

In [None]:
def load_wav_16k_mono(filename):
    wav, sr = librosa.load(filename, sr=16000, mono=True)
    return wav

def normalize_wav(wav):
    # Normalize waveform amplitude to the [-1, 1] range
    wav = tf.cast(wav, tf.float32)
    max_amp = tf.reduce_max(tf.abs(wav))
    return wav / (max_amp + 1e-8)

def split_wav_into_chunks(wav, chunk_length=2):
    if len(wav) == 0:
        return tf.constant([])

    chunk_samples = chunk_length * 16000
    num_chunks = len(wav) // chunk_samples

    if num_chunks == 0:
        return tf.constant([])

    wav = wav[:num_chunks * chunk_samples]
    return tf.reshape(wav, (num_chunks, chunk_samples))

def save_spectro(correct_chunks, folder_name, recording_name):
    # Save spectrogram images for selected chunks
    spectro_folder = os.path.join(SPECTRO_FOLDER, folder_name)
    if not os.path.exists(spectro_folder):
        os.makedirs(spectro_folder)
    
    for i, chunk in enumerate(correct_chunks):
        plt.figure(figsize=(4, 4))
        plt.specgram(chunk.numpy(), Fs=16000)
        plt.axis('off')
        plt.savefig(
            os.path.join(spectro_folder, f'{recording_name}_correct_chunk_{i}.jpg'),
            bbox_inches='tight',
            pad_inches=0
        )
        plt.close()

def save_mel_spectrograms(correct_chunks, folder_name, recording_name):
    # Save mel-spectrograms for the selected chunks
    mel_spectro_folder = os.path.join(SPECTRO_FOLDER, folder_name)
    if not os.path.exists(mel_spectro_folder):
        os.makedirs(mel_spectro_folder)
    
    for i, chunk in enumerate(correct_chunks):
        plt.figure(figsize=(4, 4))

        # Compute mel-spectrogram
        S = librosa.feature.melspectrogram(
            y=chunk.numpy(),
            sr=16000,
            n_mels=128,
            fmax=8000
        )
        S_dB = librosa.power_to_db(S, ref=np.max)

        # Render mel-spectrogram
        librosa.display.specshow(
            S_dB,
            sr=16000,
            x_axis='time',
            y_axis='mel',
            fmax=8000
        )

        plt.axis('off')
        plt.savefig(
            os.path.join(mel_spectro_folder, f'{recording_name}_mel_chunk_{i}.jpg'),
            bbox_inches='tight',
            pad_inches=0
        )
        plt.close()


# Przykładowy plik podzielony na segmenty

In [None]:
# Load and normalize target audio files
FILE = os.path.join('data', 'Anser_anser', 'Anser211456.wav')

wave, sr = load_wav_16k_mono(FILE)
wave = normalize_wav(wave)

# Split waveform into uniform chunks
file_chunks = split_wav_into_chunks(wave)

# Track chunk classification results
correctness = []
colors = []

# Classify each chunk based on amplitude thresholding
for chunk in file_chunks:
    high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.6, tf.int32))
    if high_amplitude_count >= 3:
        correctness.append(True)
        colors.append('blue')
    else:
        correctness.append(False)
        colors.append('red')

# Plot full waveform
plt.figure(figsize=(12, 6))
plt.plot(wave, label='Recording Waveform')

# Overlay chunk segmentation with classification colors
for i, (chunk, correct, color) in enumerate(zip(file_chunks, correctness, colors)):
    chunk_start = i * chunk.shape[0]
    chunk_end = (i + 1) * chunk.shape[0]
    plt.plot(range(chunk_start, chunk_end), chunk, label=f'Chunk {i+1}', color=color)

plt.title('Example Bird Sound Waveform and Classified Chunks')
plt.xlabel('Sample Index')
plt.ylabel('Amplitude')
plt.legend()
plt.grid()
plt.show()


# Pobieranie spekrogramów

In [None]:
DATA_FOLDER = "data\\"
SPECTRO_FOLDER = "spectro\\"

# Process each folder representing a species/genre
for genre_folder in os.listdir(DATA_FOLDER):
    genre_path = os.path.join(DATA_FOLDER, genre_folder)
    if os.path.isdir(genre_path):
        print("Processing genre:", genre_folder)
        
        # Process each WAV file in the folder
        for recording_name in os.listdir(genre_path):
            recording_path = os.path.join(genre_path, recording_name)
            if recording_name.endswith(".wav"):
                print("Processing recording:", recording_name)

                try:
                    # Load, normalize, and chunk the recording
                    wave = load_wav_16k_mono(recording_path)
                    wave = normalize_wav(wave)
                    chunks = split_wav_into_chunks(wave)

                    # Evaluate each chunk using amplitude thresholding
                    correctness = []
                    for chunk in chunks:
                        high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.65, tf.int32))
                        correctness.append(high_amplitude_count >= 3)

                    # Save spectrograms for valid chunks
                    correct_chunks = [
                        chunk for chunk, correct in zip(chunks, correctness) if correct
                    ]
                    save_spectro(correct_chunks, genre_folder, os.path.splitext(recording_name)[0])

                    print("Spectrograms saved successfully!")
                except Exception as e:
                    print(f"Error processing {recording_name}: {e}")
                    continue

print("All recordings processed.")


# Pobieranie spektrogramów Mela

In [None]:
DATA_FOLDER = "data\\"
MEL_FOLDER = "mel\\"

# Process each folder representing a species/genre
for genre_folder in os.listdir(DATA_FOLDER):
    genre_path = os.path.join(DATA_FOLDER, genre_folder)

    if os.path.isdir(genre_path):
        print("Processing genre:", genre_folder)

        # Process each WAV file in the directory
        for recording_name in os.listdir(genre_path):
            recording_path = os.path.join(genre_path, recording_name)

            if recording_name.endswith(".wav"):
                print("Processing recording:", recording_name)

                try:
                    # Load, normalize, and chunk audio
                    wave = load_wav_16k_mono(recording_path)
                    wave = normalize_wav(wave)
                    chunks = split_wav_into_chunks(wave)

                    # Identify valid chunks based on amplitude thresholding
                    correctness = []
                    for chunk in chunks:
                        high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.65, tf.int32))
                        correctness.append(high_amplitude_count >= 3)

                    # Save mel-spectrograms for valid chunks
                    correct_chunks = [
                        chunk for chunk, is_valid in zip(chunks, correctness) if is_valid
                    ]
                    save_mel_spectrograms(
                        correct_chunks,
                        genre_folder,
                        os.path.splitext(recording_name)[0]
                    )

                    print("Mel spectrograms saved successfully!")
                except Exception as e:
                    print(f"Error processing {recording_name}: {e}")
                    continue

print("All recordings processed.")
