In [4]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fftpack import dct
import librosa.util
import soundfile as sf

In [5]:
def dc_removal(signal):
    mean = np.mean(signal)
    return signal - mean

def preemphasize(signal, alpha=0.97):
    return np.append(signal[0], signal[1:] - alpha * signal[:-1])

def frame_blocking(signal, frame_size=400, hop_size=160):
    frames = []
    for i in range(0, len(signal) - frame_size + 1, hop_size):
        frames.append(signal[i:i+frame_size])
    return np.array(frames)

def apply_window(frames, window_type='hamming'):
    window = getattr(np, window_type)(len(frames[0]))
    return frames * window

def compute_fft(frames):
    frames_fft = np.fft.fft(frames, axis=1)
    return frames_fft[:, :frames.shape[1] // 2 + 1]

def mel_frequency_warping(y, sr, n_mfcc=13):
    y = librosa.util.fix_length(y.astype(float), size=y.shape[0] + 1)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs

def discrete_cosine_transform(mel_spectrogram, n_mfcc=13):
    return dct(mel_spectrogram, type=2, axis=0, norm='ortho')[:n_mfcc]

def lifter(mfcc, L=22):
    n_coeffs = mfcc.shape[0]
    n = np.arange(n_coeffs)
    lifter = 1 + (L / 2) * np.sin(np.pi * n / L)
    return lifter * mfcc

def compute_mfccs(y, sr, n_mfcc=13, L=22):
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return lifter(mfccs, L)

In [None]:
directory_path = "D:\\Project\\Joki\\KNN-MFCC-REV\\Audio1"

all_files = os.listdir(directory_path)
wav_files = [file for file in all_files if file.endswith(".wav")]

df_result = pd.DataFrame()

for wav_file in wav_files:
    file_path = os.path.join(directory_path, wav_file)
    audio_data, sample_rate = sf.read(file_path, dtype='float32')

    print(f"\nProcessing file: {wav_file}")

    # DC-Removal
    audio_data_dc = dc_removal(audio_data)
    audio_data_dc_flat = audio_data_dc.flatten()
    df_dc = pd.DataFrame({"DC-Removal": audio_data_dc_flat})
    print("DC-Removal DataFrame:")
    print(df_dc)

    # Preemphasize
    audio_data_preemphasized = preemphasize(audio_data_dc)
    df_preemphasized = pd.DataFrame({"Preemphasize": audio_data_preemphasized})
    print("\nPreemphasize DataFrame:")
    print(df_preemphasized.head())  

    # Frame Blocking
    frames = frame_blocking(audio_data_preemphasized)
    df_frames = pd.DataFrame(frames, columns=[f"Sample_{i}" for i in range(frames.shape[1])])
    print("\nFrame Blocking DataFrame:")
    print(df_frames.head())  

    # Windowing
    frames_windowed = apply_window(frames)
    df_windowed = pd.DataFrame(frames_windowed, columns=[f"Sample_{i}_Windowed" for i in range(frames_windowed.shape[1])])
    print("\nWindowing DataFrame:")
    print(df_windowed.head())  

    # FFT
    frames_fft = compute_fft(frames_windowed)
    df_fft = pd.DataFrame(frames_fft, columns=[f"FFT_{i}" for i in range(frames_fft.shape[1])])
    print("\nFFT DataFrame:")
    print(df_fft.head())  
    
    # Define n_mels before this section
    n_mels = 20  

    # After the compute_fft function
    print(f"Frames Shape: {frames.shape}")
    print(f"Frames FFT Shape: {frames_fft.shape}")

    # Mel-frequency warping (MFW)
    mel_filterbanks = librosa.filters.mel(sr=sample_rate, n_fft=frames_fft.shape[1], n_mels=n_mels).T
    print(f"Mel Filterbanks Shape: {mel_filterbanks.shape}")

    # Mel-frequency warping (MFW)
    mel_spectrogram = mel_frequency_warping(frames_fft, sample_rate)
    flat_mel_spectrogram = mel_spectrogram.reshape(mel_spectrogram.shape[0], -1)
    df_mfw = pd.DataFrame(flat_mel_spectrogram.T, columns=[f"MFCC_{i+1}" for i in range(flat_mel_spectrogram.shape[0])])
    print("\nMel-frequency Warping DataFrame:")
    print(df_mfw.head())  

    # Discrete Cosine Transform (DCT)
    mfccs = discrete_cosine_transform(mel_spectrogram)
    
    # Apply Cepstral Liftering
    mfccs_lifted = lifter(mfccs)
    flat_mfccs = mfccs_lifted.reshape(mfccs_lifted.shape[0], -1)
    df_dct = pd.DataFrame(flat_mfccs.T, columns=[f"MFCC_{i+1}" for i in range(flat_mfccs.shape[0])])
    print("\nDiscrete Cosine Transform DataFrame (After Liftering):")
    print(df_dct.head())  

    # Mel-frequency cepstral coefficients (MFCCs)
    mfccs_librosa = compute_mfccs(audio_data, sample_rate)
    flat_mfccs_librosa = mfccs_librosa[:, :, 0].T
    df_mfccs_librosa = pd.DataFrame(flat_mfccs_librosa, columns=[f"MFCC_{i}" for i in range(flat_mfccs_librosa.shape[1])])
    print("\nMFCCs (Librosa) DataFrame:")
    print(df_mfccs_librosa.head())
    
    samples_to_display = 44100
    if len(audio_data_preemphasized) < samples_to_display:
        samples_to_display = len(audio_data_preemphasized)
    
    signal_samples = audio_data_preemphasized[:samples_to_display]
    

    df_signal = pd.DataFrame(signal_samples).T
    df_signal.insert(0, 'file', wav_file)

    df_result = pd.concat([df_result, df_signal], ignore_index=True)

columns = ['file'] + [str(i) for i in range(1, df_result.shape[1])]
df_result.columns = columns

df_result.to_csv('data_sinyal.csv', index=False)

print(df_result.head())


Processing file: 1lhong 1.wav
DC-Removal DataFrame:
        DC-Removal
0        -0.000017
1        -0.000017
2        -0.000017
3        -0.000017
4        -0.000017
...            ...
416115    0.000990
416116    0.001082
416117    0.001082
416118    0.000990
416119    0.000990

[416120 rows x 1 columns]

Preemphasize DataFrame:
   Preemphasize
0 -1.671910e-05
1 -1.671910e-05
2 -5.015718e-07
3 -5.015718e-07
4 -5.015718e-07

Frame Blocking DataFrame:
       Sample_0      Sample_1      Sample_2      Sample_3      Sample_4  \
0 -1.671910e-05 -1.671910e-05 -5.015718e-07 -5.015718e-07 -5.015718e-07   
1 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07   
2 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07   
3 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07   
4 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07 -5.015718e-07   

       Sample_5      Sample_6      Sample_7      Sample_8      Sample_9  ...  \
0 -5.015718e-