In [1]:
import os
import numpy as np
import librosa
import soundfile as sf

def preprocess_audio(audio_path, target_shape=(128, 128), duration_sec=2, target_sr=8000):
    audio_data, sample_rate = librosa.load(audio_path, sr=target_sr)
    num_samples = int(duration_sec * sample_rate)
    
  
    if len(audio_data) > num_samples:
        audio_data = audio_data[:num_samples]
    else:
        audio_data = np.pad(audio_data, (0, num_samples - len(audio_data)))
    
    # Calculate STFT [128, 128]
    n_fft = 256 
    hop_length = int(len(audio_data) / target_shape[1])
    
    # Convert audio to frequency domain using STFT
    stft_result = np.abs(librosa.stft(audio_data, n_fft=n_fft, hop_length=hop_length))
    
    
    if stft_result.shape[0] > target_shape[0]:
        stft_result = stft_result[:target_shape[0], :]
    else:
        stft_result = np.pad(stft_result, ((0, target_shape[0] - stft_result.shape[0]), (0, 0)))
    
    
    if stft_result.shape[1] > target_shape[1]:
        stft_result = stft_result[:, :target_shape[1]]
    else:
        stft_result = np.pad(stft_result, ((0, 0), (0, target_shape[1] - stft_result.shape[1])))
    
    
    audio_data = stft_result.T
    
    return audio_data

def process_and_save_audio(input_folder, output_folder, target_shape=(128, 128), duration_sec=2, target_sr=8000):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)
            preprocessed_audio = preprocess_audio(input_path, target_shape, duration_sec, target_sr)
            audio_flattened = preprocessed_audio.flatten()
            
            
            sf.write(output_path, audio_flattened, target_sr)
            print(f"Processed and saved: {output_path}")

input_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_00/orig'
output_folder = '/home/masense/projects/ai8x-training/data/AudioAutoencoder/raw/fan/id_00/normal'
process_and_save_audio(input_folder, output_folder)

Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000000.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000001.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000002.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000003.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000004.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000005.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000006.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000007.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000008.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000009.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000010.wav
Processed and saved: ../data/AudioAutoencoder/raw/fan/id_00/normal\00000011.wav
Processed and saved: ../data/AudioAutoen

In [3]:
import sys, os
from torch.utils import data

sys.path.append("..")

import torch
import ai8x
from distiller import apputils
from datasets import audioautoencoder

%matplotlib inline

In [4]:
directory = os.getcwd()
training_dir = os.path.abspath(os.path.join(directory, os.pardir))

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_path = os.path.join(training_dir, 'data')
simulate = False

class Args:
    def __init__(self, act_mode_8bit):
        self.act_mode_8bit = act_mode_8bit
        self.truncate_testset = False

args = Args(act_mode_8bit=simulate)

ai8x.set_device(device=85, simulate=simulate, round_avg=True)

Configuring device: MAX78000, simulate=False.


In [6]:
train_set, test_set = audioautoencoder.get_datasets((data_path, args), load_train=True, load_test=True)


[1/1] c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\raw\fan\id_00
load_pickle <- c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\processed/train_fan_id_00_raw.pickle
load_pickle <- c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\processed/train_labels_fan_id_00_raw.pickle
<class 'numpy.ndarray'>

[1/1] c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\raw\fan\id_00
load_pickle <- c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\processed/eval_files_fan_id_00_raw.pickle
load_pickle <- c:\Users\MTinaco\Dev\Algorithm\ai8x-training\data\AudioAutoencoder\processed/eval_labels_fan_id_00_raw.pickle
<class 'list'>


In [8]:
dset_iter = iter(train_set)

In [10]:
p = next(dset_iter)