In [4]:
import pandas as pd

# 1.1 Read the CSV
df = pd.read_csv('BirdsVoice.csv')

# 1.2 Compute total recordings per species
counts = df['common_name'].value_counts()
top20 = counts.nlargest(20).index

# 1.3 Filter to top 20 and reset index
df = df[df['common_name'].isin(top20)].reset_index(drop=True)
print(df.shape)            # ~600 × 10
print(df['common_name'].unique())

(600, 10)
['Brazilian Tinamou' 'Red-legged Tinamou' 'Yellow-legged Tinamou'
 'Black-capped Tinamou' 'Thicket Tinamou' 'Variegated Tinamou'
 "Bartlett's Tinamou" 'Small-billed Tinamou' 'Tataupa Tinamou'
 'Red-winged Tinamou' 'Andean Tinamou' 'White-bellied Nothura'
 'Spotted Nothura' 'Dwarf Tinamou' 'Orange-footed Scrubfowl'
 'Plain Chachalaca' 'Little Chachalaca' 'Band-tailed Guan' 'Andean Guan'
 'Rusty-margined Guan']


In [5]:
import os

# 2.1 Convert “M:SS” → seconds
def length_to_seconds(x):
    m, s = x.split(':')
    return int(m) * 60 + int(s)

df['duration_s'] = df['recording_length'].apply(length_to_seconds)

# 2.2 Assume you’ve downloaded audio into ./audio/, named by xc_id:
df['filepath'] = df['xc_id'].apply(lambda id: os.path.join('audio', f'{id}.mp3'))

# 2.3 Quick check
print(df[['filepath','duration_s']].head())

             filepath  duration_s
0  audio/XC524787.mp3          57
1  audio/XC521357.mp3           6
2  audio/XC686176.mp3          53
3  audio/XC643154.mp3         140
4  audio/XC606110.mp3          42


In [None]:
from sklearn.model_selection import train_test_split

# 3.1 Label encoding
label_map = {name:idx for idx,name in enumerate(top20)}
df['label'] = df['common_name'].map(label_map)

# 3.2 60/20/20 stratified split
trainval, test = train_test_split(df, test_size=0.20, 
                                  stratify=df['label'], random_state=42)
train, val   = train_test_split(trainval, test_size=0.25,  # 0.25×0.8 = 0.20
                                 stratify=trainval['label'], random_state=42)

print(train.shape, val.shape, test.shape)

(360, 13) (120, 13) (120, 13)


In [16]:
import librosa
import numpy as np
import os

def extract_mel(filepath):
    try:
        # Check if file exists first
        if not os.path.exists(filepath):
            print(f"File not found: {filepath}")
            return None
            
        # Try loading with librosa
        y, sr = librosa.load(filepath, sr=None, duration=None, mono=True)
        
        # Extract mel spectrogram
        mel_spec = librosa.feature.melspectrogram(
            y=y, 
            sr=sr, 
            n_mels=128,  # adjust as needed
            fmax=8000    # adjust as needed
        )
        
        # Convert to log scale
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        
        return mel_spec_db
        
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None

In [8]:
import os

# 2.1 Convert “M:SS” → seconds
def length_to_seconds(x):
    m, s = x.split(':')
    return int(m) * 60 + int(s)

df['duration_s'] = df['recording_length'].apply(length_to_seconds)

# 2.2 Build filepaths in your nested audio folder
#    (adjust the folder name if yours differs)
audio_dir = os.path.join('audio',
                         'sound-of-114-species-of-birds-till-2022')

df['filepath'] = df['xc_id'].apply(
    lambda xc_id: os.path.join(audio_dir, f"{xc_id}.mp3")
)

# 2.3 Quick sanity check
print(df[['xc_id', 'filepath', 'duration_s']].head())


      xc_id                                           filepath  duration_s
0  XC524787  audio/sound-of-114-species-of-birds-till-2022/...          57
1  XC521357  audio/sound-of-114-species-of-birds-till-2022/...           6
2  XC686176  audio/sound-of-114-species-of-birds-till-2022/...          53
3  XC643154  audio/sound-of-114-species-of-birds-till-2022/...         140
4  XC606110  audio/sound-of-114-species-of-birds-till-2022/...          42


In [19]:
import tensorflow as tf

def build_dataset(df_subset):
    X, y = [], []
    for _, row in df_subset.iterrows():
        feat = extract_mel(row['filepath'])
        X.append(feat[..., np.newaxis])                # (128, T, 1)
        y.append(row['label'])
    X = np.stack(X, axis=0)
    y = tf.keras.utils.to_categorical(y, num_classes=len(label_map))
    return X, y

X_train, y_train = build_dataset(train)
X_val,   y_val   = build_dataset(val)
X_test,  y_test  = build_dataset(test)

input_shape = X_train.shape[1:]  # e.g. (128, ~216, 1)

File not found: audio/sound-of-114-species-of-birds-till-2022/XC66646.mp3


TypeError: 'NoneType' object is not subscriptable

In [15]:
# Debug your file paths
print("Checking first few file paths:")
for i, row in train.head().iterrows():
    filepath = row['filepath']
    exists = os.path.exists(filepath)
    print(f"{filepath} - Exists: {exists}")

Checking first few file paths:
audio/sound-of-114-species-of-birds-till-2022/XC66646.mp3 - Exists: False
audio/sound-of-114-species-of-birds-till-2022/XC451828.mp3 - Exists: False
audio/sound-of-114-species-of-birds-till-2022/XC221871.mp3 - Exists: False
audio/sound-of-114-species-of-birds-till-2022/XC500229.mp3 - Exists: False
audio/sound-of-114-species-of-birds-till-2022/XC529057.mp3 - Exists: False


In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(label_map), activation='softmax'),
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=32,
    epochs=30,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
)
