In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import pandas as pd

data_csv = pd.read_csv('/kaggle/input/fsc22-dataset/Metadata-20220916T202011Z-001/Metadata/Metadata V1.0 FSC22.csv')




In [2]:
data_csv


Unnamed: 0,Source File Name,Dataset File Name,Class ID,Class Name
0,17548__A.wav,1_10101.wav,1,Fire
1,17548_B.wav,1_10102.wav,1,Fire
2,17548_C.wav,1_10103.wav,1,Fire
3,17548_D.wav,1_10104.wav,1,Fire
4,17548_E.wav,1_10105.wav,1,Fire
...,...,...,...,...
2020,164882__timsc__squirrel-call.wav,27_12771.wav,27,Squirrel
2021,162648__cognito-perceptu__park-bench-atmospher...,27_12772.wav,27,Squirrel
2022,122260__echobones__angry-squirrel-long.wav,27_12773.wav,27,Squirrel
2023,82828__noisecollector__angrysquirrel-creepingt...,27_12774.wav,27,Squirrel


We have 2025 data sets as our audio data sets.

to Augment the data set and widen it we pitch 2 steps up and down in each audio file

In [3]:
import librosa
import soundfile as sf
from tqdm import tqdm
import os

input_dir = "/kaggle/input/fsc22-dataset/Audio Wise V1.0-20220916T202003Z-001/Audio Wise V1.0"

output_dir = "/kaggle/working/FSC22_augmented"
os.makedirs(output_dir, exist_ok = True)


PITCH_STEPS = [2,-2]

for file in tqdm(os.listdir(input_dir)):
    if file.endswith(".wav"):
        file_path = os.path.join(input_dir, file)

        y, sr = librosa.load(file_path, sr=None) #loading the audio

        sf.write(os.path.join(output_dir, file),y,sr)

        for step in PITCH_STEPS:
            y_shifted = librosa.effects.pitch_shift(y,sr= sr, n_steps=step)
            base, ext = os.path.splitext(file)
            new_filename=f"{base}_pitch{step}{ext}"
            sf.write(os.path.join(output_dir, new_filename), y_shifted, sr)

print("Augmentation Completed!")




100%|██████████| 2025/2025 [10:17<00:00,  3.28it/s]

Augmentation Completed!





In [4]:
filename_to_label= {}

for i, row in data_csv.iterrows():
    filename = row["Dataset File Name"]
    class_id = row["Class ID"]
    filename_to_label[filename] = class_id

def get_orginal_filename(filename):
    base, ext = os.path.splitext(filename)
    if "_pitch" in base:
        base = base.split("_pitch")[0]
    return base + ext
    

Since we have augmented the data set now we are trying to do feature extraction.

for ausio signal processing, we use MFCC and melspectrogram methods using librosa library

In [6]:
import numpy as np
auto_dir = "/kaggle/working/FSC22_augmented"

#Feature Extraction Parameters

SR = 22050 #sampling rate
N_FFT = 2048  #number of samples per FFT window
HOP_LENGTH = 512 #
N_MELS = 128 #number of mel frequency bands. 128 is ide
N_MFCC = 13 #number of MFCC coefficients

def extract_mel_spectrogram(file_path):
    y, sr = librosa.load(file_path, sr=SR) # Y is the 1D waveform array
    mel_spec = librosa.feature.melspectrogram( y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)
    mel_specs_db = librosa.power_to_db(mel_spec, ref=np.max)

    return mel_specs_db

def extract_mfcc(file_path):
    y, sr = librosa.load(file_path, sr=SR)
    mfcc = librosa.feature.mfcc(y=y, sr=sr,n_mfcc=N_MFCC,n_fft=N_FFT,hop_length=HOP_LENGTH)

    #MFCC algorthm works as.. STFT ->MEL filterbank(triangular) -> log -> DCT
    return mfcc



#Looping 


mel_features = []
mfcc_features = []
file_names = []

for file in tqdm(os.listdir(auto_dir)):
    if file.endswith(".wav"):

        path = os.path.join(auto_dir, file)

        
        mel =extract_mel_spectrogram(path)
        mfcc = extract_mfcc(path)

        mel_features.append(mel)
        mfcc_features.append(mfcc)
        file_names.append(file)

mel_features = np.array(mel_features, dtype = object)
mfcc_features = np.array(mfcc_features, dtype = object)


        

100%|██████████| 6075/6075 [03:40<00:00, 27.57it/s]


In [7]:
import os
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import librosa

x = []
y = []

#preparing 1D feature 

for mel, fname in zip(mel_features, file_names):
    orginal_fname = get_orginal_filename(fname)
    label = filename_to_label[orginal_fname]

    mel_mean = np.mean(mel,axis =1)
    x.append(mel_mean)
    y.append(label)
    

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split( x, y, test_size=0.2, random_state=42, stratify = y)

for i in range(len(y_train)):
    y_train[i] = y_train[i]-1

for j in range(len(y_val)):
    y_val[j] = y_val[j] - 1




In [None]:

import xgboost as xgb

dtrain =xgb.DMatrix( X_train, label = y_train)
dval = xgb.DMatrix(X_val, label=y_val)

params = {
    "objective": "multi:softmax",
    "num_class" : 27,
    "eval_metric":"merror",
    "subsample":1,
    "min_child_weight":1,
    "max_depth":6,
    "learning_rate":0.3,
    "eval_metric" : "mlogloss"
}

num_rounds = 100

model = xgb.train(
    params,
    dtrain,
    num_boost_round = num_rounds,
    evals=[(dval, "validation")])

preds = model.predict(dval)
accuracy = (preds==y_val).mean()
print("validation _accuracy:", accuracy)

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, Dropout)
from tensorflow.keras.callbacks import EarlyStopping

2026-01-31 16:54:41.313671: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1769878481.594540      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1769878481.674815      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1769878482.353276      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769878482.353343      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769878482.353347      55 computation_placer.cc:177] computation placer alr

In [9]:
Num_Classes = 27
Input_Shape = (128, 431,3)

model = Sequential()

model.add(Conv2D(
    filters = 24,
    kernel_size = (6,6),
    activation = 'relu',
    input_shape = Input_Shape
))

model.add(MaxPooling2D(pool_size=(4,2)))

model.add(Conv2D(
    filters = 48,
    kernel_size = (5,5),
    activation = 'relu',
    padding = "same"
))

model.add(Conv2D(
    filters = 48,
    kernel_size = (5,5),
    activation = 'relu',
    padding = "same"
))

model.add(Conv2D(
    filters = 60,
    kernel_size = (4,4),
    activation = 'relu',
    padding = "same"
))

model.add(Conv2D(
    filters = 72,
    kernel_size = (4,4),
    activation = 'relu',
    padding = "same"
))

model.add(Conv2D(
    filters = 80,
    kernel_size = (3,3),
    activation = 'relu',
    padding = "same"
))

model.add(Conv2D(
    filters = 80,
    kernel_size = (3,3),
    activation = 'relu',
    padding = "same"
))

model.add(Flatten())
model.add(Dense(128, activation  = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(Num_Classes, activation = 'softmax'))




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2026-01-31 16:54:59.507659: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [None]:
early_stop = EarlyStopping(
    monitor = 'val_loss',
    patience = 5,
    restore_best_weights = True
    )

model.compile(
    optimizer = 'adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['accuracy']
)
history = model.fit(
    X_train, y_train,
    validation_data = (X_val, y_val),
    epochs = 50,
    batch_size = 32,
    callbacks = [early_stop]
    )

model.summary()

