In [None]:
import os
import numpy as np
import scipy.io.wavfile as wavfile


input_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_00/normal'
output_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_chunked00'

os.makedirs(output_folder, exist_ok=True)

def preprocess_audio(file_path, output_path):
    sample_rate, audio_data = wavfile.read(file_path)  
    
    
    # Check if audio data is 1D or 2D (mono or stereo)
    if len(audio_data.shape) > 1:
        audio_data = audio_data[:, 0] 

    # Preprocess and reshape data
    chunk_size = 128 * 128
    num_chunks = len(audio_data) // chunk_size
    reshaped_data = audio_data[:num_chunks * chunk_size].reshape(num_chunks, 128, 128)

    if len(audio_data) % chunk_size != 0:
        extra_data = audio_data[num_chunks * chunk_size:]
        extra_data_padded = np.pad(extra_data, (0, chunk_size - len(extra_data)), 'constant')
        reshaped_data = np.append(reshaped_data, extra_data_padded.reshape(-1, 128, 128), axis=0)

    # Adjust channels
    reshaped_data = np.expand_dims(reshaped_data, axis=1)  
    reshaped_data = np.repeat(reshaped_data, 128, axis=1) 
    
    # Flatten the data to save as a single WAV file
    reshaped_data_flat = reshaped_data.flatten()
    
    # Save the processed data as a WAV file
    wavfile.write(output_path, sample_rate, reshaped_data_flat.astype(np.int16))

def process_all_files(input_folder, output_folder):
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.wav'): 
            file_path = os.path.join(input_folder, file_name)
            output_file_name = os.path.splitext(file_name)[0] + '_processed.wav'
            output_path = os.path.join(output_folder, output_file_name)
            preprocess_audio(file_path, output_path)
            print(f'Processed {file_name} and saved to {output_file_name}')

process_all_files(input_folder, output_folder)