In [10]:
import librosa
import numpy as np

def extract_features(audio_file):
    y, sr = librosa.load(audio_file, sr=None)
    
    # Adjust n_fft based on the length of the signal
    n_fft = 64  # Use a smaller n_fft for shorter signals
    
    # MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=60, n_fft=n_fft)
    mfcc_mean = np.mean(mfcc, axis=1)
    
    # Delta1 and Delta2
    delta1 = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    delta1_mean = np.mean(delta1, axis=1)
    delta2_mean = np.mean(delta2, axis=1)
    
    # ZCR
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    
    # Spectral features
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=n_fft))
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft))
    rms = np.mean(librosa.feature.rms(y=y, frame_length=n_fft))
    
    # Chroma features
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft))
    chroma_cqt = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr))
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr))
    
    # Entropy
    # Filter out non-positive values from mfcc_mean
    positive_mfcc_mean = mfcc_mean[mfcc_mean > 0]
    entropy = -np.sum(np.log(positive_mfcc_mean) * positive_mfcc_mean)
    
    # Spectral Flatness
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=y, n_fft=n_fft))
    
    # Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=n_fft))
    
    # Spectral Contrast
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=n_fft))
    
    # Poly features
    poly = np.mean(librosa.feature.poly_features(y=y, sr=sr))
    
    # Tempogram
    tempogram = np.mean(librosa.feature.tempogram(y=y, sr=sr))
    
    # Tonnetz
    tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr))
    
    # Combine all features into a single array
    features = np.hstack([mfcc_mean, delta1_mean, delta2_mean, zcr, spectral_rolloff, spectral_centroid, rms, 
                          chroma_stft, chroma_cqt, chroma_cens, entropy, spectral_flatness, spectral_bandwidth, 
                          spectral_contrast, poly, tempogram, tonnetz])

    i = 0
    for ft in [mfcc_mean, delta1_mean, delta2_mean, zcr, spectral_rolloff, spectral_centroid, rms, 
                          chroma_stft, chroma_cqt, chroma_cens, entropy, spectral_flatness, spectral_bandwidth, 
                          spectral_contrast, poly, tempogram, tonnetz]:
        #print (len(ft))
        print(f'{ft = }')
        
        #print(i)
        
        i=i+1
    
    print ("-" *20)
    
    print (f"Length of Features extracted: {len(features)}")
    return features

In [11]:
extract_features ("real.wav")

ft = array([-8.0206006e+02, -4.1369869e+01,  1.5642055e+01,  7.7201903e-01,
       -1.4108875e+01, -1.6313097e+00, -7.7375774e+00, -6.2318130e+00,
       -5.7948713e+00, -4.9274054e-01, -1.1952804e+01, -4.7312441e+00,
       -5.4782448e+00, -4.6726408e+00, -1.7094040e+00, -1.0462088e+01,
       -8.6670570e+00, -7.8843827e+00, -8.6436930e+00, -1.0897948e+01,
       -1.6363104e+01, -9.4146023e+00,  2.3177643e-01,  2.5614014e+01,
        3.7843651e+01,  4.5710751e+01,  2.2687120e+01, -7.7535543e+00,
       -1.6019367e+01, -6.3929186e+00,  7.6414051e+00,  1.1137823e+01,
       -7.5247059e+00, -1.2651187e+01, -8.1710205e+00,  6.3941069e+00,
        1.4830893e+00, -1.4505834e+01, -1.0562977e+01,  6.0277123e+00,
        4.6135182e+00, -2.3084652e+01, -2.7690603e+01,  2.0789633e+00,
       -7.1297917e+00, -2.5503143e+01, -1.1742363e+01,  2.0813429e+01,
        3.9397556e+01,  2.6184132e+01,  1.7992516e+01,  1.4421191e+01,
       -1.2575236e+01, -1.4499277e+01,  8.4370213e+00,  1.9496798e+01,
 

array([-8.02060059e+02, -4.13698692e+01,  1.56420546e+01,  7.72019029e-01,
       -1.41088753e+01, -1.63130975e+00, -7.73757744e+00, -6.23181295e+00,
       -5.79487133e+00, -4.92740542e-01, -1.19528036e+01, -4.73124409e+00,
       -5.47824478e+00, -4.67264080e+00, -1.70940399e+00, -1.04620876e+01,
       -8.66705704e+00, -7.88438272e+00, -8.64369297e+00, -1.08979483e+01,
       -1.63631039e+01, -9.41460228e+00,  2.31776431e-01,  2.56140137e+01,
        3.78436508e+01,  4.57107506e+01,  2.26871204e+01, -7.75355434e+00,
       -1.60193672e+01, -6.39291859e+00,  7.64140511e+00,  1.11378231e+01,
       -7.52470589e+00, -1.26511869e+01, -8.17102051e+00,  6.39410686e+00,
        1.48308933e+00, -1.45058336e+01, -1.05629768e+01,  6.02771235e+00,
        4.61351824e+00, -2.30846519e+01, -2.76906033e+01,  2.07896328e+00,
       -7.12979174e+00, -2.55031433e+01, -1.17423630e+01,  2.08134289e+01,
        3.93975563e+01,  2.61841316e+01,  1.79925156e+01,  1.44211912e+01,
       -1.25752363e+01, -