In [1]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('data/Animal_Sound.csv')

# Print the DataFrame
print(df.columns)

df['path'] = df['name'].apply(lambda x: f"data/sounds/{x}")
df['name'] = df['name'].str.lower().str.split("_").str[0]

all_animals = df['name'].unique()
print(all_animals)

df.to_csv("data/Animal_Sound_processed.csv", index=False)

Index(['name', 'path', 'channels', 'sample_width', 'frame_rate', 'nframes',
       'duration', 'size'],
      dtype='object')
['lion' 'bear' 'cat' 'chicken' 'cow' 'dog' 'dolphin' 'donkey' 'elephant'
 'frog' 'horse' 'monkey' 'sheep']


In [2]:
import librosa
from IPython.display import Audio
import numpy as np
import soundfile as sf

def apply_time_stretch(y, rate_range=(0.8, 1.2)):
    rate = np.random.uniform(*rate_range)
    return librosa.effects.time_stretch(y, rate=rate)

def apply_pitch_shift(y, sr, n_steps_range=(-3, 3)):
    n_steps = np.random.uniform(*n_steps_range)
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

def apply_time_delay(y, sr, max_delay_sec=0.5):
    max_delay = int(sr * max_delay_sec)
    delay = np.random.randint(0, max_delay)
    return np.pad(y, (delay, 0))[:len(y)]  # Crop to original length

In [3]:
import os

os.makedirs("data/augmented", exist_ok=True)
augmented_data = []

for path, name in zip(df['path'], df['name']):
    # load the sound file 
    y, sr = librosa.load(path, sr=None)
    
    # apply time-stretching
    y_stretched = apply_time_stretch(y)

    # apply pitch-shifting
    y_stretched_pitch_shifted = apply_pitch_shift(y_stretched, sr)

    # Create output file name
    base_name = os.path.splitext(os.path.basename(path))[0]
    processed_file_name = f"data/augmented/{base_name}_modified.wav"

    # write the augmented sound file
    sf.write(processed_file_name, y_stretched_pitch_shifted, sr)

    # add the new file to the DataFrame
    augmented_data.append({'path': processed_file_name, 'name': name})

df_augmented = pd.DataFrame(augmented_data)
df = pd.concat([df, df_augmented], ignore_index=True)

df.to_csv("data/Animal_Sound_modified.csv", index=False)





In [4]:
import pandas as pd

# Load the full augmented dataset
df = pd.read_csv("data/Animal_Sound_modified.csv")

# Group by 'name' and sample 10 examples per class (with a fixed seed for reproducibility)
df_reduced = df.groupby('name', group_keys=False).apply(lambda x: x.sample(n=10, random_state=42))

# Save the reduced dataset to a new CSV file
df_reduced.to_csv("data/Animal_Sound_reduced.csv", index=False)

print("Saved reduced dataset with 10 samples per class to 'data/Animal_Sound_reduced.csv'")


Saved reduced dataset with 10 samples per class to 'data/Animal_Sound_reduced.csv'


  df_reduced = df.groupby('name', group_keys=False).apply(lambda x: x.sample(n=10, random_state=42))


In [5]:
import librosa
from IPython.display import Audio
import numpy as np

def play(file_path):
    x, Fs = librosa.load(file_path, sr=None)
    print('Class: {}'.format(file_path))
    return Audio(x, rate=Fs)


In [6]:
audio_path = df['path'].iloc[600]
play(audio_path)

Class: data/sounds/Sheep_1.wav
