# Zamiana mp3 na wav

In [None]:
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
import os

# Ścieżka do folderu z plikami mp3
mp3_folder = r"D:\sieci_neuronowe\projekt-sieci-neuronowe\data\Parus_major"

# Ścieżka do folderu, gdzie zostaną zapisane pliki wav
wav_folder = r"D:\sieci_neuronowe\projekt-sieci-neuronowe\data\wav\Parus_major"



# Upewnij się, że folder docelowy istnieje, jeśli nie, to go utwórz
if not os.path.exists(wav_folder):
    os.makedirs(wav_folder)

# Iteruj przez pliki w folderze mp3
for file in os.listdir(mp3_folder):
    if file.endswith(".mp3"):
        mp3_path = os.path.join(mp3_folder, file)
        wav_path = os.path.join(wav_folder, os.path.splitext(file)[0] + ".wav")
        
        try:
            # Wczytaj plik mp3
            audio = AudioSegment.from_mp3(mp3_path)
            
            # Zapisz jako plik wav
            audio.export(wav_path, format="wav")
            print(f"Plik {file} został przekonwertowany.")
        except CouldntDecodeError:
            print(f"Nie można przekonwertować pliku {file}. Plik jest uszkodzony lub nieobsługiwany.")
            continue

print("Konwersja zakończona.")


# Funkcje wykorzystywane do przetwarzania

In [None]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
from matplotlib import pyplot as plt



def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav


def normalize_wav(wav):
    # Convert to float32 for normalization
    wav = tf.cast(wav, tf.float32)
    # Normalize the waveform
    max_amp = tf.reduce_max(tf.abs(wav))
    wav /= max_amp
    return wav

def split_wav_into_chunks(wav, chunk_length=3):
    # Calculate number of samples per chunk
    chunk_samples = chunk_length * 16000  # Assuming 16 kHz sample rate
    # Calculate total number of chunks
    num_chunks = len(wav) // chunk_samples
    # Truncate the wav to fit an integer number of chunks
    wav = wav[:num_chunks * chunk_samples]
    # Reshape the wav into chunks
    wav_chunks = tf.reshape(wav, (num_chunks, chunk_samples))
    return wav_chunks



# Przykładowy plik podzielony na segmenty

In [None]:
# Load and normalize CAPUCHIN_FILE

CAPUCHIN_FILE = os.path.join('data1', 'Anser_anser', 'Anser211456.wav')
NOT_CAPUCHIN_FILE = os.path.join('data1', 'Parsed_Not_Capuchinbird_Clips', 'afternoon-birds-song-in-forest-0.wav')

capuchin_wave = load_wav_16k_mono(CAPUCHIN_FILE)
capuchin_wave = normalize_wav(capuchin_wave)

# Split into chunks
capuchin_chunks = split_wav_into_chunks(capuchin_wave)

# Initialize lists to store correctness and colors
correctness = []
colors = []

# Determine correctness of each chunk
for chunk in capuchin_chunks:
    # Calculate number of times amplitude > 0.3
    high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.6, tf.int32))
    # Check if count is greater than or equal to 3
    if high_amplitude_count >= 3:
        correctness.append(True)
        colors.append('blue')
    else:
        correctness.append(False)
        colors.append('red')

# Plot the original waveform
plt.figure(figsize=(12, 6))
plt.plot(capuchin_wave, label='Recording Waveform')

# Plot each chunk with correct color
for i, (chunk, correct, color) in enumerate(zip(capuchin_chunks, correctness, colors)):
    chunk_start = i * chunk.shape[0]
    chunk_end = (i + 1) * chunk.shape[0]
    plt.plot(range(chunk_start, chunk_end), chunk, label=f'Chunk {i+1}', color=color if correct else 'red')

plt.title('Anser_anser Waveform and Its Chunks (Correct/Incorrect)')
plt.xlabel('Sample Index')
plt.ylabel('Amplitude')
plt.legend()
plt.grid()
plt.show()


# Pobieranie spekrogramów

In [None]:
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
import os
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
from matplotlib import pyplot as plt

DATA_FOLDER = 'data1'
SPECTRO_FOLDER = os.path.join(DATA_FOLDER, 'Spectro_v2')

def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def normalize_wav(wav):
    # Convert to float32 for normalization
    wav = tf.cast(wav, tf.float32)
    # Normalize the waveform
    max_amp = tf.reduce_max(tf.abs(wav))
    wav /= max_amp
    return wav

def split_wav_into_chunks(wav, chunk_length=2):
    # Calculate number of samples per chunk
    chunk_samples = chunk_length * 16000  # Assuming 16 kHz sample rate
    # Calculate total number of chunks
    num_chunks = len(wav) // chunk_samples
    # Truncate the wav to fit an integer number of chunks
    wav = wav[:num_chunks * chunk_samples]
    # Reshape the wav into chunks
    wav_chunks = tf.reshape(wav, (num_chunks, chunk_samples))
    return wav_chunks

def save_spectro(correct_chunks, folder_name, recording_name):
    spectro_folder = os.path.join(SPECTRO_FOLDER, folder_name)
    if not os.path.exists(spectro_folder):
        os.makedirs(spectro_folder)
    
    for i, chunk in enumerate(correct_chunks):
        plt.figure(figsize=(4, 4))
        plt.specgram(chunk.numpy(), Fs=16000)
        plt.axis('off')
        plt.savefig(os.path.join(spectro_folder, f'{recording_name}_correct_chunk_{i}.jpg'), bbox_inches='tight', pad_inches=0)
        plt.close()

# Iterate through each genre folder
for genre_folder in os.listdir(DATA_FOLDER):
    genre_path = os.path.join(DATA_FOLDER, genre_folder)
    if os.path.isdir(genre_path):
        print("Processing genre:", genre_folder)
        
        # Iterate through each recording within the genre folder
        for recording_name in os.listdir(genre_path):
            recording_path = os.path.join(genre_path, recording_name)
            if recording_name.endswith(".wav"):
                print("Processing recording:", recording_name)

                try:
                    # Load and normalize the WAV file
                    wave = load_wav_16k_mono(recording_path)
                    wave = normalize_wav(wave)

                    # Split into chunks
                    chunks = split_wav_into_chunks(wave)

                    # Initialize list to store correctness
                    correctness = []

                    # Determine correctness of each chunk
                    for chunk_index, chunk in enumerate(chunks):
                        # Calculate number of times amplitude > 0.3
                        high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.65, tf.int32))
                        # Check if count is greater than or equal to 3
                        if high_amplitude_count >= 3:
                            correctness.append(True)
                        else:
                            correctness.append(False)

                    # Save spectrogram for correct chunks
                    correct_chunks = [chunk for chunk, correct in zip(chunks, correctness) if correct]
                    save_spectro(correct_chunks, genre_folder, os.path.splitext(recording_name)[0])

                    print("Spectrograms saved successfully!")
                except Exception as e:
                    print(f"An error occurred while processing {recording_name}: {e}")
                    continue

print("All recordings processed.")


# Tworzenie spektrogramów Mela


In [None]:
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
import os
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
from matplotlib import pyplot as plt
import librosa
import librosa.display

DATA_FOLDER = 'data1'
SPECTRO_FOLDER = 'mel_v2'

def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def normalize_wav(wav):
    # Convert to float32 for normalization
    wav = tf.cast(wav, tf.float32)
    # Normalize the waveform
    max_amp = tf.reduce_max(tf.abs(wav))
    wav /= max_amp
    return wav

def split_wav_into_chunks(wav, chunk_length=2):
    # Calculate number of samples per chunk
    chunk_samples = chunk_length * 16000  # Assuming 16 kHz sample rate
    # Calculate total number of chunks
    num_chunks = len(wav) // chunk_samples
    # Truncate the wav to fit an integer number of chunks
    wav = wav[:num_chunks * chunk_samples]
    # Reshape the wav into chunks
    wav_chunks = tf.reshape(wav, (num_chunks, chunk_samples))
    return wav_chunks

def save_mel_spectrograms(correct_chunks, folder_name, recording_name):
    mel_spectro_folder = os.path.join(SPECTRO_FOLDER, folder_name)
    if not os.path.exists(mel_spectro_folder):
        os.makedirs(mel_spectro_folder)
    
    for i, chunk in enumerate(correct_chunks):
        plt.figure(figsize=(4, 4))
        S = librosa.feature.melspectrogram(y=chunk.numpy(), sr=16000, n_mels=128, fmax=8000)
        S_dB = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(S_dB, sr=16000, x_axis='time', y_axis='mel', fmax=8000)
        # plt.colorbar(format='%+2.0f dB')  # Usuń lub skomentuj tę linię, aby usunąć legendę
        plt.axis('off')
        plt.savefig(os.path.join(mel_spectro_folder, f'{recording_name}_mel_chunk_{i}.jpg'), bbox_inches='tight', pad_inches=0)
        plt.close()


# Iterate through each genre folder
for genre_folder in os.listdir(DATA_FOLDER):
    genre_path = os.path.join(DATA_FOLDER, genre_folder)
    if os.path.isdir(genre_path):
        print("Processing genre:", genre_folder)
        
        # Iterate through each recording within the genre folder
        for recording_name in os.listdir(genre_path):
            recording_path = os.path.join(genre_path, recording_name)
            if recording_name.endswith(".wav"):
                print("Processing recording:", recording_name)

                try:
                    # Load and normalize the WAV file
                    wave = load_wav_16k_mono(recording_path)
                    wave = normalize_wav(wave)

                    # Split into chunks
                    chunks = split_wav_into_chunks(wave)

                    # Initialize list to store correctness
                    correctness = []

                    # Determine correctness of each chunk
                    for chunk_index, chunk in enumerate(chunks):
                        # Calculate number of times amplitude > 0.3
                        high_amplitude_count = tf.reduce_sum(tf.cast(chunk > 0.65, tf.int32))
                        # Check if count is greater than or equal to 3
                        if high_amplitude_count >= 3:
                            correctness.append(True)
                        else:
                            correctness.append(False)

                    # Save mel spectrogram for correct chunks
                    correct_chunks = [chunk for chunk, correct in zip(chunks, correctness) if correct]
                    save_mel_spectrograms(correct_chunks, genre_folder, os.path.splitext(recording_name)[0])

                    print("Mel spectrograms saved successfully!")
                except Exception as e:
                    print(f"An error occurred while processing {recording_name}: {e}")
                    continue

print("All recordings processed.")
