<a href="https://colab.research.google.com/github/azizatanfous/final_project_python/blob/main/Untitled38.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Libraries 
import tensorflow as tf #build and deploy machine learning models
import tensorflow_hub as hub  #pre-trained machine learning models and reusable code modules
import tensorflow_io as tfio #collection of file systems and file formats for TensorFlow
import pandas as pd #data manipulation and analysis
import numpy as np #numerical computing in Python
import librosa #Python package for audio and music signal processing
import glob #retrieve files/pathnames that match a specified pattern.
import csv
import io #functionality for working with input and output streams.
from IPython.display import Audio #display audio files in Jupyter notebooks or other IPython environments.
from tqdm import tqdm  #add progress bars to loops and other iterables
import torchaudio #enables efficient and fast processing of audio signals and supports loading and decoding audio files in various formats
import torch
from torch.utils.data import DataLoader, Dataset


In [None]:
# option 
df = pd.read_csv('/kaggle/input/birdclef-2023/train_metadata.csv')
AUDIO_PATH = Path('/kaggle/input/birdclef-2023/train_audio')
model = hub.load('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/TensorFlow2/variations/bird-vocalization-classifier/versions/2')
model_labels_df = pd.read_csv(hub.resolve('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/2') + "/assets/label.csv")

SAMPLE_RATE = 32000
WINDOW = 5*SAMPLE_RATE

bc2023_labels = sorted(df.primary_label.unique())
label_to_index = {v: k for k, v in enumerate(bc2023_labels)}
model_labels = {v: k for k, v in enumerate(model_labels_df.ebird2021)}
model_bc2023_indexes = [model_labels[label] if label in model_labels else -1 for label in bc2023_labels]

In [None]:
# 1st option
#  Explore the training data
# Load a sample audio files from two different species
audio_abe, sr_abe = librosa.load("/kaggle/input/birdclef-2023/train_audio/abethr1/XC128013.ogg")
audio_abh, sr_abh = librosa.load("/kaggle/input/birdclef-2023/train_audio/abhori1/XC127317.ogg")

In [None]:
# 2nd option
import multiprocessing as mp
import librosa

# Define a function to load audio files
def load_audio(file_path):
    audio, sr = librosa.load(file_path)
    return audio, sr

# Define the file paths
file_paths = ["/kaggle/input/birdclef-2023/train_audio/abethr1/XC128013.ogg",
              "/kaggle/input/birdclef-2023/train_audio/abhori1/XC127317.ogg"]

# Create a pool of worker processes
pool = mp.Pool()

# Load the audio files in parallel
results = pool.map(load_audio, file_paths)

# Close the pool of worker processes
pool.close()
pool.join()

# Extract the audio and sample rate from the results
audio_abe, sr_abe = results[0]
audio_abh, sr_abh = results[1]


In [None]:
# 1st option
# Play the audio
Audio(data=audio_abe, rate=sr_abe)
# Play the audio
Audio(data=audio_abh, rate=sr_abh)

In [None]:
# 2nd option
from pydub import AudioSegment
from pydub.playback import play

# Load the audio file with pydub
audio_abe_pydub = AudioSegment.from_file("/kaggle/input/birdclef-2023/train_audio/abethr1/XC128013.ogg", format="ogg")

# Play the audio with pydub
play(audio_abe_pydub)


In [None]:
# 3rd option
import winsound

# Load the audio file with librosa
audio_abe, sr_abe = librosa.load("/kaggle/input/birdclef-2023/train_audio/abethr1/XC128013.ogg")

# Play the audio with winsound
winsound.PlaySound(audio_abe, winsound.SND_MEMORY)


In [None]:
# Match the model's output with the bird species in the competition
model = hub.load('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/1')
labels_path = hub.resolve('https://kaggle.com/models/google/bird-vocalization-classifier/frameworks/tensorFlow2/variations/bird-vocalization-classifier/versions/1') + "/assets/label.csv"

In [None]:
#1st option
#  Find the name of the class with the top score when mean-aggregated across frames.
def class_names_from_csv(class_map_csv_text):
    """Returns list of class names corresponding to score vector."""
    with open(labels_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        class_names = [mid for mid, desc in csv_reader]
        return class_names[1:]

## note that the bird classifier classifies a much larger set of birds than the
## competition, so we need to load the model's set of class names or else our 
## indices will be off.
classes = class_names_from_csv(labels_path)

In [2]:
# 2nd option 
import pandas as pd

def class_names_from_csv(class_map_csv_text):
    """Returns list of class names corresponding to score vector."""
    class_names = pd.read_csv(labels_path, usecols=[0, 1], header=None, index_col=0, squeeze=True).to_dict()
    return list(class_names.values())

classes = class_names_from_csv(labels_path)


SyntaxError: ignored

In [None]:
train_metadata = pd.read_csv("/kaggle/input/birdclef-2023/train_metadata.csv")
train_metadata.head()
competition_classes = sorted(train_metadata.primary_label.unique())
​
forced_defaults = 0
competition_class_map = []
for c in competition_classes:
    try:
        i = classes.index(c)
        competition_class_map.append(i)
    except:
        competition_class_map.append(0)
        forced_defaults += 1
        
## this is the count of classes not supported by our pretrained model
## you could choose to simply not predict these, set a default as above,
## or create your own model using the pretrained model as a base.
forced_defaults

In [None]:
# option 2 
import dask.dataframe as dd

train_metadata = dd.read_csv("/kaggle/input/birdclef-2023/train_metadata.csv")
train_metadata = train_metadata.compute()


In [None]:
# option 3 
train_metadata = pd.read_csv("/kaggle/input/birdclef-2023/train_metadata.csv", usecols=["primary_label"])


In [None]:
# Preprocess the data
def frame_audio(
      audio_array: np.ndarray,
      window_size_s: float = 5.0,
      hop_size_s: float = 5.0,
      sample_rate = 32000,
      ) -> np.ndarray:
    
    """Helper function for framing audio for inference."""
    """ using tf.signal """
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)
    return framed_audio

def ensure_sample_rate(waveform, original_sample_rate,
                       desired_sample_rate=32000):
    """Resample waveform if required."""
    if original_sample_rate != desired_sample_rate:
        waveform = tfio.audio.resample(waveform, original_sample_rate, desired_sample_rate)
    return desired_sample_rate, waveform

In [None]:
# 2nd option
import librosa

def frame_audio(
    audio_array: np.ndarray,
    window_size_s: float = 5.0,
    hop_size_s: float = 5.0,
    sample_rate = 32000,
) -> np.ndarray:
    """Helper function for framing audio for inference."""
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    framed_audio = librosa.util.frame(audio_array, frame_length, hop_length)
    return framed_audio.T

def ensure_sample_rate(waveform, original_sample_rate,
                       desired_sample_rate=32000):
    """Resample waveform if required."""
    if original_sample_rate != desired_sample_rate:
        waveform = librosa.resample(waveform, original_sample_rate, desired_sample_rate)
    return desired_sample_rate, waveform


In [None]:
# 1st option
audio, sample_rate = librosa.load("/kaggle/input/birdclef-2023/train_audio/afghor1/XC156639.ogg")
sample_rate, wav_data = ensure_sample_rate(audio, sample_rate)
Audio(wav_data, rate=sample_rate)

In [None]:
# 2nd option
import soundfile as sf

def load_and_resample_audio(file_path, desired_sample_rate=32000):
    """Load audio file and resample if required."""
    audio, sample_rate = sf.read(file_path)
    if sample_rate != desired_sample_rate:
        audio = librosa.resample(audio.T, sample_rate, desired_sample_rate)
        sample_rate = desired_sample_rate
    return audio, sample_rate


In [None]:
# Step 5: Make predictions
fixed_tm = frame_audio(wav_data)
logits, embeddings = model.infer_tf(fixed_tm[:1])
probabilities = tf.nn.softmax(logits)
argmax = np.argmax(probabilities)
print(f"The audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[0][argmax]}")

In [None]:
# 2nd option 
def batch_frame_audio(audio_array: np.ndarray, batch_size: int, window_size_s: float = 5.0,
                      hop_size_s: float = 5.0, sample_rate=32000) -> np.ndarray:
    """Helper function for batching and framing audio for inference."""
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    frames = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)
    
    num_frames = frames.shape[0]
    num_batches = int(np.ceil(num_frames / batch_size))
    padded_frames = np.zeros((num_batches * batch_size, frame_length))
    padded_frames[:num_frames] = frames
    
    batched_frames = np.reshape(padded_frames, (num_batches, batch_size, frame_length))
    return batched_frames


def predict_audio_class(model, audio, batch_size=16):
    """Predict the class of an audio sample using a pre-trained model."""
    fixed_audio, sample_rate = load_and_resample_audio(audio)
    frames = batch_frame_audio(fixed_audio, batch_size)
    probabilities = []
    for batch in frames:
        logits, embeddings = model.infer_tf(batch)
        batch_probabilities = tf.nn.softmax(logits)
        probabilities.append(batch_probabilities)
    probabilities = tf.concat(probabilities, axis=0)
    argmax = np.argmax(probabilities, axis=1)
    classes = class_names_from_csv(labels_path)
    class_names = [classes[i] for i in argmax]
    return class_names, probabilities


In [None]:
# 1st option
def predict_for_sample(filename, sample_submission, frame_limit_secs=None):
    file_id = filename.split(".ogg")[0].split("/")[-1]
    
    audio, sample_rate = librosa.load(filename)
    sample_rate, wav_data = ensure_sample_rate(audio, sample_rate)
    
    fixed_tm = frame_audio(wav_data)
    
    frame = 5
    all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])
    for window in fixed_tm[1:]:
        if frame_limit_secs and frame > frame_limit_secs:
            continue
        
        logits, embeddings = model.infer_tf(window[np.newaxis, :])
        all_logits = np.concatenate([all_logits, logits], axis=0)
        frame += 5
    
    frame = 5
    all_probabilities = []
    for frame_logits in all_logits:
        probabilities = tf.nn.softmax(frame_logits).numpy()
        
        ## set the appropriate row in the sample submission
        sample_submission.loc[sample_submission.row_id == file_id + "_" + str(frame), competition_classes] = probabilities[competition_class_map]
        frame += 5

In [None]:
# 2nd option 
def predict_for_sample(filename, sample_submission, frame_limit_secs=None):
    file_id = filename.split(".ogg")[0].split("/")[-1]
    
    audio, sample_rate = librosa.load(filename)
    sample_rate, wav_data = ensure_sample_rate(audio, sample_rate)
    
    fixed_tm = frame_audio(wav_data)
    
    frame = 5
    all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])
    for window in fixed_tm[1:]:
        if frame_limit_secs and frame > frame_limit_secs:
            continue
        
        logits, embeddings = model.infer_tf(window[np.newaxis, :])
        all_logits = np.concatenate([all_logits, logits], axis=0)
        frame += 5
    
    frame = 5
    all_probabilities = []
    for frame_logits in all_logits:
        probabilities = tf.nn.softmax(frame_logits).numpy()
        
        ## set the appropriate row in the sample submission
        sample_submission.loc[sample_submission.row_id == file_id + "_" + str(frame), competition_classes] = probabilities[competition_class_map]
        frame += 5

In [None]:
# Step 6: Generate a submission
test_samples = list(glob.glob("/kaggle/input/birdclef-2023/test_soundscapes/*.ogg"))
test_samples

In [None]:
sample_sub = pd.read_csv("/kaggle/input/birdclef-2023/sample_submission.csv")
sample_sub[competition_classes] = sample_sub[competition_classes].astype(np.float32)
sample_sub.head()

In [None]:
frame_limit_secs = 15 if sample_sub.shape[0] == 3 else None
for sample_filename in test_samples:
    predict_for_sample(sample_filename, sample_sub, frame_limit_secs=15)

In [None]:
sample_sub

In [None]:
sample_sub.to_csv("submission.csv", index=False)