In [35]:
#import Libraries
import numpy as np
from pydub import AudioSegment
import os
import glob
import tsfel
import csv

In [37]:
#configuration
fs=44100        #audio sampling frequency
t=2             #time of each music sample
n_samples=10    #number of samples used in each music

In [39]:
#list all the mp3 in audio folder:
audio_folder = ".\\audio_files\\"
mp3_files = glob.glob(audio_folder+"*.mp3")
n_mp3 = len(mp3_files)

print('List of Audio Files:', *mp3_files, sep='\n- ')

List of Audio Files:
- .\audio_files\cozy-romantic-relaxing-podcast-jazz-music-212468.mp3
- .\audio_files\easy-podcast-intro-theme-255102.mp3
- .\audio_files\elevator-small-talk-255085.mp3


In [41]:
#extract features
features=[]     #list of features
target = 0
#Loop all the available audio files
for track in mp3_files:

    # Load MP3 file
    audio = AudioSegment.from_file(track)
    
    #convert to mono
    audio = audio.set_channels(1)
    
    # Convert to NumPy array
    signal = np.array(audio.get_array_of_samples())

    #loop for the configured number of samples 
    for n in range(0,n_samples):
        s=signal[n*fs*t:(n+1)*(fs*t)] #extract part of the signal

        #extract features
        d=[]
        d.append(target) #target class
        d.append(tsfel.feature_extraction.features.spectral_centroid(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_decrease(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_kurtosis(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_skewness(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_slope(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_spread(s, fs))
        d.append(tsfel.feature_extraction.features.spectral_variation(s, fs))
        features.append(d)
        print(d)

    target += 1

[0, 1787.4519491172243, 0.0027301266851156723, 9.974942045586284, 2.6980493566758823, -5.1695487617545056e-09, 3607.4835748521764, 0.13289896596282513]
[0, 1171.500284693019, 0.0027468021297969414, 19.769406837503556, 3.8058074443526726, -5.514249774034541e-09, 2553.1728616708074, 0.11167508241567459]
[0, 1707.4691243910108, 0.003189365577479107, 10.245730195498057, 2.735916669502417, -5.214309028250279e-09, 3479.34644024243, 0.12167564469281167]
[0, 1987.9531770558785, 0.0020221916410620416, 8.917202451102954, 2.5405630405294315, -5.057343567377034e-09, 3785.0560542992844, 0.16476461999674552]
[0, 1984.0337094862466, 0.002046553963955552, 9.887130973133125, 2.6865797774199462, -5.059536993447429e-09, 3688.3857151243124, 0.08448946227665755]
[0, 1807.7615795269537, 0.002664350925341825, 10.044555350251663, 2.7236496742926883, -5.1581830157623875e-09, 3716.893375733205, 0.18709330249711198]
[0, 2256.7708733049117, 0.002413663697296337, 8.480128548888331, 2.453131088131762, -4.9069068734

In [33]:
#save features list in the file audio_features.csv
with open("audio_features.csv", "w", newline="") as file:
    writer = csv.writer(file)
    for n in range(len(features)):
        writer.writerow(features[n])  # Writes all numbers in a single row