In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob

import librosa
import librosa.display
import IPython.display as ipd

from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/audio-files/wavs/sinh_2067.wav
/kaggle/input/audio-files/wavs/sinh_1672.wav
/kaggle/input/audio-files/wavs/sinh_4242.wav
/kaggle/input/audio-files/wavs/sinh_5025.wav
/kaggle/input/audio-files/wavs/sinh_4493.wav
/kaggle/input/audio-files/wavs/sinh_4581.wav
/kaggle/input/audio-files/wavs/sinh_1821.wav
/kaggle/input/audio-files/wavs/sinh_2219.wav
/kaggle/input/audio-files/wavs/sinh_3295.wav
/kaggle/input/audio-files/wavs/sinh_0171.wav
/kaggle/input/audio-files/wavs/sinh_1686.wav
/kaggle/input/audio-files/wavs/sinh_5962.wav
/kaggle/input/audio-files/wavs/sinh_5491.wav
/kaggle/input/audio-files/wavs/sinh_4284.wav
/kaggle/input/audio-files/wavs/sinh_0409.wav
/kaggle/input/audio-files/wavs/sinh_6127.wav
/kaggle/input/audio-files/wavs/sinh_5805.wav
/kaggle/input/audio-files/wavs/sinh_4398.wav
/kaggle/input/audio-files/wavs/sinh_1468.wav
/kaggle/input/audio-files/wavs/sinh_2812.wav
/kaggle/input/audio-files/wavs/sinh_5135.wav
/kaggle/input/audio-files/wavs/sinh_5823.wav
/kaggle/in

In [2]:
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd

# Input folder with wav files
input_folder = '/kaggle/input/audio-files/wavs'

# Output CSV file
output_csv = '/kaggle/working/audio_features.csv'

# Parameters
PEAK_LEVEL = 0.99        # Peak normalization
TRIM_TOP_DB = 20  
sr = 22050
n_fft = 1024
hop_length = 256
n_mels = 80

# List to hold feature data
features_list = []

# Loop through all wav files
for file_name in os.listdir(input_folder):
    if file_name.endswith('.wav'):
        file_path = os.path.join(input_folder, file_name)
        
        # Load audio
        y, _ = librosa.load(file_path, sr=sr)
        
        # Mel spectrogram
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        
        # Energy (Root Mean Square)
        rms = librosa.feature.rms(y=y)
        energy = float(np.mean(rms))
        
        # Zero Crossing Rate
        zcr = np.mean(librosa.feature.zero_crossing_rate(y))
        
        # Spectral Centroid
        spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
        
        # Spectral Bandwidth
        spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        
        # Spectral Roll-off
        rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
        
        # Flatten Mel spectrogram to store in CSV (optional: can also save as .npy separately)
        ##mel_flat = mel_db.flatten()
        ##mel_flat_str = ','.join(map(str, mel_flat[:1000]))  # limit to first 1000 values for CSV brevity
        np.save("audio_mel.npy", mel_db)

        # Add to list
        features_list.append({
            'file_name': file_name,
            'energy': energy,
            'zcr': zcr,
            'spectral_centroid': spec_centroid,
            'spectral_bandwidth': spec_bw,
            'rolloff': rolloff,
            
        })

# Convert to DataFrame
df = pd.DataFrame(features_list)

# Save to CSV
df.to_csv(output_csv, index=False)
print(f"Features saved to {output_csv}")


Features saved to /kaggle/working/audio_features.csv


In [None]:
def preprocess_audio(file_path, save_audio_path):
    # 1️Load audio
    audio, sr = librosa.load(file_path, sr=None, mono=False)
    
    # Convert to mono
    if audio.ndim > 1:
        audio = np.mean(audio, axis=0)
    
    # Resample
    if sr != TARGET_SR:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=TARGET_SR)
        sr = TARGET_SR
    
    # Remove DC offset
    audio = audio - np.mean(audio)
    
    # Peak normalization
    peak = np.max(np.abs(audio))
    if peak > 0:
        audio = audio * (PEAK_LEVEL / peak)
    
    # Trim silence
    audio, _ = librosa.effects.trim(audio, top_db=TRIM_TOP_DB)
    
    # Save processed audio
    sf.write(save_audio_path, audio, sr)
    
    return audio, sr