In [1]:
import os
import pandas as pd
import numpy as np
import librosa

In [2]:
# Function to extract features from audio file
def extract_features(file_path):
    audio, sr = librosa.load(file_path, sr=None)

    f0_mean = 0
    
    # Harmonics
    harmonics = np.mean(librosa.effects.harmonic(audio))
    
    # Mean of audio file
    audio_mean = np.mean(audio)
    
    # Standard Deviation
    std_dev = np.std(audio)
    
    # Variance
    variance = np.var(audio)
    
    # Coefficient of Variance
    coeff_variance = std_dev / audio_mean
    
    # Duration
    duration = librosa.get_duration(y=audio, sr=sr)
    
    # Spectral Envelope
    spectral_envelope = np.mean(librosa.feature.spectral_flatness(y=audio))
    
    # Spectral Flux
    spectral_flux = np.mean(librosa.onset.onset_strength(y=audio, sr=sr))
    
    # Spectral Energy
    spectral_energy = np.mean(librosa.feature.rms(y=audio))
    
    # Spectral Centroids
    spectral_centroids = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr))
    
    # MFCCs (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    avg_mfccs_mean = np.mean(mfccs_mean)
    
#     print('Periodicity ',periodicity)
    print('Harmonic_avg ',harmonics)
    print('Mean ',audio_mean)
    print('SD ',std_dev)
    print('Variance ',variance)
    print('Cov ',coeff_variance)
    print('Duration',duration)
    print('MFCC ',avg_mfccs_mean)
    print('Spectral Envelop',spectral_envelope)
    print('Spectral Flux ',spectral_flux)
    print('Spectral Energy ',spectral_energy)
    print('Spectral Centroids ', spectral_centroids)
    
    return [f0_mean, 1, harmonics, audio_mean, std_dev, variance, coeff_variance, duration,
            avg_mfccs_mean,spectral_envelope, spectral_flux, spectral_energy, spectral_centroids]

In [3]:
extracted_features=extract_features('D:/Major Project/Autocorrelated Audio Files/News/sound6/H_ac_chain.wav')
df = pd.DataFrame([extracted_features])
output_file = "D:/Major Project/News_Dataset.xlsx"

if os.path.isfile(output_file):
  
    existing_df = pd.read_excel(output_file)
    
    df = pd.concat([existing_df, df], ignore_index=True)

df.to_excel(output_file, index=False)



Harmonic_avg  3.0908627e-08
Mean  6.241449e-07
SD  0.015955063
Variance  0.00025456402
Cov  25563.074
Duration 11.9059375
MFCC  -35.625557
Spectral Envelop 0.00014277724
Spectral Flux  0.80184066
Spectral Energy  0.010758973
Spectral Centroids  500.1117319503433
