In [None]:
import os
import numpy as np
import librosa
import soundfile as sf


def preprocess_audio(audio_path, target_len=256, duration_sec=2):
    audio_data, sample_rate = librosa.load(audio_path, sr=16000)
    
    num_samples = int(duration_sec * sample_rate) 
   
    if len(audio_data) > num_samples:
        audio_data = audio_data[:num_samples]
    else:
        audio_data = np.pad(audio_data, (0, num_samples - len(audio_data)))
    
    # Convert audio to frequency domain
    audio_data = np.abs(librosa.stft(audio_data, n_fft=512, hop_length=256))[:target_len]
    
    # Transpose the data to fit the expected input dimensions // [target_len, 1] - matching [BATCH_SZ, FFT_LEN, N_AXES]
    audio_data = audio_data.T
    
    return audio_data

def process_and_save_audio(input_folder, output_folder, target_len=256, duration_sec=2):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)
            preprocessed_audio = preprocess_audio(input_path, target_len, duration_sec)
            audio_flattened = preprocessed_audio.flatten()
            sf.write(output_path, audio_flattened, 16000)
            print(f"Processed and saved: {output_path}")


input_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_00/orig'
output_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_00/normal'
process_and_save_audio(input_folder, output_folder)
