In [2]:
import numpy as np
import os
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
import soundfile as sf

In [4]:
input_folder = r'C:\Users\dandr\Documents\DataMining\FinalProject\audio_files'
output_folder = r'C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip'

def trim_and_save_audio(input_folder, output_folder):
    for bird_folder in os.listdir(input_folder):
        bird_folder_path = os.path.join(input_folder, bird_folder)
        print(bird_folder)
        if os.path.isdir(bird_folder_path):
            output_bird_folder_path = os.path.join(output_folder, bird_folder)
            os.makedirs(output_bird_folder_path, exist_ok=True)

            for audio_file in os.listdir(bird_folder_path):
                if audio_file.endswith(".mp3"):
                    audio_path = os.path.join(bird_folder_path, audio_file)
                    text_file_path = os.path.join(bird_folder_path, audio_file.replace(".mp3", ".txt"))

                    if os.path.exists(text_file_path):
                        with open(text_file_path, 'r') as text_file:
                            lines = text_file.readlines()

                            for i, line in enumerate(lines, start=1):
                                start_time, end_time, _ = line.strip().split('\t')
                                start_time, end_time = float(start_time), float(end_time)
                                print(f"Start: {start_time}, end: {end_time}")

                                # Load audio with librosa
                                y, sr = librosa.load(audio_path, sr=None)

                                # Trim audio
                                start_frame = int(start_time * sr)
                                end_frame = int(end_time * sr)
                                trimmed_audio = y[start_frame:end_frame]

                                # Check if the trimmed audio duration is greater than 0 sec
                                if len(trimmed_audio) / sr > 0:
                                    # Save trimmed audio
                                    output_audio_name = f"{os.path.splitext(audio_file)[0]}_{i}.wav"
                                    output_audio_path = os.path.join(output_bird_folder_path, output_audio_name)
                                    sf.write(output_audio_path, trimmed_audio, sr)
                                    print(f"Trimmed audio saved: {output_audio_path}")

trim_and_save_audio(input_folder, output_folder)

Acrocephalus arundinaceus
Start: 2.168366, end: 5.108297
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417157_1.wav
Start: 19.289143, end: 20.885486
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417157_2.wav
Start: 3.496018, end: 5.093184
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417158_1.wav
Start: 9.991159, end: 13.149998
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417158_2.wav
Start: 18.651346, end: 21.277797
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417158_3.wav
Start: 41.739264, end: 43.709102
Trimmed audio saved: C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip\Acrocephalus arundinaceus\XC417158_4.wav
Start: 48.518345, end: 52.17408
Trimmed audio sav

In [72]:
def extract_audio_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)

    if not y.size:  # Check if the audio is empty
        return None

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0]
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)

    # Check if any feature has zero size
    if any(len(feature) == 0 for feature in [mfccs, spectral_centroid, spectral_bandwidth,
                                             spectral_rolloff, spectral_contrast,
                                             zero_crossing_rate, chroma]):
        return None

    return {
        'mfccs': np.mean(mfccs, axis=1),
        'spectral_centroid': np.mean(spectral_centroid),
        'spectral_bandwidth': np.mean(spectral_bandwidth),
        # 'spectral_rolloff': np.mean(spectral_rolloff),
        # 'spectral_contrast': np.mean(spectral_contrast, axis=1),
        'zero_crossing_rate': np.mean(zero_crossing_rate),
        'chroma': np.mean(chroma, axis=1)
    }


def extract_features_for_folder(folder_path, label):
    features_list = []

    for audio_file in os.listdir(folder_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            features = extract_audio_features(audio_path)
            if features:
                features['label'] = label
                features_list.append(features)

    return features_list


def prepare_dataset(input_folder):
    dataset = []

    for bird_folder in os.listdir(input_folder):
        bird_folder_path = os.path.join(input_folder, bird_folder)
        if os.path.isdir(bird_folder_path):
            label = bird_folder
            features_list = extract_features_for_folder(bird_folder_path, label)
            dataset.extend(features_list)

    df_feat = pd.DataFrame(dataset)

    # Save DataFrame to CSV
    df_feat.to_csv(r"C:\Users\dandr\Documents\DataMining\FinalProject\birds_song_features.csv", index=False)
    #df_feat.to_csv(r"C:\Users\dandr\Documents\DataMining\FinalProject\birds_song_features2.csv", index=False, float_format='%.8f')

    return df_feat

In [76]:
def extract_audio_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)

    if not y.size:  # Check if the audio is empty
        return None

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, win_length = 2048, hop_length=512)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)

    # Check if any feature has zero size
    if any(len(feature) == 0 for feature in [mfccs, spectral_centroid, zero_crossing_rate]):
        return None

    # Reshape mfccs to a 1D array
    mfccs = mfccs.T
    spectral_centroid = spectral_centroid.reshape(-1,1)
    zero_crossing_rate = zero_crossing_rate.reshape(-1,1)

    return {
        'mfccs': mfccs,
        'spectral_centroid': spectral_centroid,
        'zero_crossing_rate': zero_crossing_rate
    }


def extract_features_for_folder(folder_path, label):
    features_list = []

    for audio_file in os.listdir(folder_path):
        # print(folder_path)
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(folder_path, audio_file)
            features = extract_audio_features(audio_path)
            if features:
                features['label'] = label
                features = np.concatenate(features, axis=1)

                for f in features:
                    row = f.tolist()
                    row.append(label)
                    features_list.append(row)

    return features_list


def prepare_dataset(input_folder):
    dataset = []

    for bird_folder in os.listdir(input_folder):
        bird_folder_path = os.path.join(input_folder, bird_folder)
        if os.path.isdir(bird_folder_path):
            label = bird_folder
            features_list = extract_features_for_folder(bird_folder_path, label)
            dataset.extend(features_list)


    # Save DataFrame to CSV
    # df_feat.to_csv(r"C:\Users\dandr\Documents\DataMining\FinalProject\birds_song_features.csv", index=False)
    #df_feat.to_csv(r"C:\Users\dandr\Documents\DataMining\FinalProject\birds_song_features2.csv", index=False, float_format='%.8f')

    return df_feat

In [77]:
if __name__ == "__main__":
    # input_folder = r"C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip3\"
    input_folder = r"C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip3\Acrocephalus arundinaceus"

    # Prepare the dataset
    dataset = extract_features_for_folder(input_folder, "Acrocephalus arundinaceus")
    # display(dataset)

TypeError: The first input argument needs to be a sequence

In [85]:
audio_folder = r"C:\Users\dandr\Documents\DataMining\FinalProject\audio_files"
data = []
# Recorremos las subcarpetas de "audio_files"
for bird_folder in os.listdir(audio_folder):
    print(bird_folder)
    # Obtenemos el nombre del pajaro
    bird_name = bird_folder

    # Recorremos los archivos de la subcarpeta
    for file in os.listdir(os.path.join(audio_folder, bird_folder)):
        if file.endswith(".mp3"):
            # Cargamos el archivo de audio y el archivo de texto
            audio_path = os.path.join(audio_folder, bird_folder, file)
            text_path = os.path.join(audio_folder, bird_folder, file.replace(".mp3", ".txt"))

            # Cargamos el audio con librosa
            y, sr = librosa.load(audio_path)

            # Leemos el archivo de texto y cortamos el audio
            with open(text_path, "r") as f:
                for line in f:
                    start_time, end_time, label = line.strip().split("\t")
                    start_frame = librosa.time_to_samples(float(start_time))
                    end_frame = librosa.time_to_samples(float(end_time))
                    cut_audio = y[start_frame:end_frame]

                    if len(cut_audio) > 0:
                      # Calcular la longitud de la ventana y el salto de forma proporcional al tamaño del audio cortado
                      window_length = len(cut_audio) // 4  # Puedes ajustar el factor como creas conveniente
                      hop_length = window_length // 4  # Puedes ajustar el factor como creas conveniente

                      mfcc = librosa.feature.mfcc(y=cut_audio, sr=sr, n_mfcc=13, n_fft=window_length, hop_length=hop_length)
                      zcr = librosa.feature.zero_crossing_rate(y=cut_audio, frame_length=window_length, hop_length=hop_length)
                      spectral_centroid = librosa.feature.spectral_centroid(y=cut_audio, sr=sr, n_fft=window_length, hop_length=hop_length)
                      spectral_flux = librosa.onset.onset_strength(S=librosa.feature.melspectrogram(y=cut_audio, sr=sr, n_fft=window_length, hop_length=hop_length))
                      spectral_bandwidth = librosa.feature.spectral_bandwidth(y=cut_audio, sr=sr, n_fft=window_length, hop_length=hop_length)
                      energy = librosa.feature.rms(y=cut_audio, frame_length=window_length, hop_length=hop_length)

                      zcr = zcr.reshape(-1, 1)
                      spectral_centroid = spectral_centroid.reshape(-1, 1)
                      spectral_flux = spectral_flux.reshape(-1, 1)
                      spectral_bandwidth = spectral_bandwidth.reshape(-1, 1)
                      energy = energy.reshape(-1, 1)

                      features = np.concatenate((zcr, spectral_centroid, spectral_flux, spectral_bandwidth, energy, mfcc.T), axis=1)

                      for f in features:
                          row = f.tolist()
                          row.append(bird_name)
                          data.append(row)

Acrocephalus arundinaceus
Acrocephalus melanopogon
Acrocephalus scirpaceus
Alcedo atthis
Anas platyrhynchos
Anas strepera
Ardea purpurea
Botaurus stellaris
Charadrius alexandrinus


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Ciconia ciconia
Circus aeruginosus
Coracias garrulus
Dendrocopos minor
Fulica atra
Gallinula chloropus
Himantopus himantopus
Ixobrychus minutus
Motacilla flava
Porphyrio porphyrio
Tachybaptus ruficollis


In [89]:
## Crear un DataFrame con las características
columns = ['zero_crossing_rate', 'spectral_centroid', 'spectral_flux', 'spectral_bandwidth',
          'energy']
columns.extend(['mfcc_{}'.format(i) for i in range(13)])
columns.append('label')

df_feat = pd.DataFrame(data, columns=columns)
df_feat.to_csv(r"C:\Users\dandr\Documents\DataMining\FinalProject\birds_song_features.csv", index=False)


In [53]:
if __name__ == "__main__":
    input_folder = r"C:\Users\dandr\Documents\DataMining\FinalProject\audio_clip2"

    # Prepare the dataset
    dataset = prepare_dataset(input_folder)
    print(dataset)

  bins_per_octave=bins_per_octave,


                                                 mfccs  spectral_centroid  \
0    [-144.01588, 131.91563, -114.978226, -24.67387...        2892.970542   
1    [-145.36244, 136.58644, -111.282074, -13.82940...        3196.365915   
2    [-294.68402, 123.38041, -98.854675, 68.9578, -...        3732.432893   
3    [-268.4957, 136.41379, -118.67018, 28.211489, ...        3134.916171   
4    [-289.01138, 139.7056, -97.71578, 27.623909, -...        3543.816580   
..                                                 ...                ...   
574  [-271.59976, -11.773421, -66.410324, -48.86634...        8554.680112   
575  [-272.32803, -7.1234593, -62.074947, -55.64829...        8513.200652   
576  [-293.2679, -9.276755, -59.753338, -53.343166,...        9808.917465   
577  [-413.33792, 129.1721, -51.125813, 27.783161, ...        3679.000337   
578  [-280.68478, 98.23872, 2.0270221, -23.479555, ...        5898.196530   

     spectral_bandwidth  spectral_rolloff  zero_crossing_rate  \
0         

In [None]:
def train_and_evaluate_model(dataset):
    X = dataset.drop(columns=['label'])
    y = dataset['label']

    # Encode labels to numerical values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Train a Random Forest Classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    # Cross-validation (optional)
    cv_scores = cross_val_score(model, X, y_encoded, cv=5)
    print(f"Cross-validation scores: {cv_scores}")