In [1]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from pathlib import Path

In [None]:
def create_output_directory(base_path):
    """Create output directory for spectrograms if it doesn't exist."""
    output_dir = os.path.join(base_path, 'spectrograms_S02_P05')
    os.makedirs(output_dir, exist_ok=True)
    return output_dir

def load_audio_chunk(file_path, start_time, chunk_duration):
    """Load a chunk of audio from the file."""
    y, sr = librosa.load(file_path, offset=start_time, duration=chunk_duration)
    return y, sr

def create_spectrogram(y, sr, output_path):
    """Create and save a spectrogram for the given audio chunk."""
    plt.figure(figsize=(10, 4))
    
    # Create spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    librosa.display.specshow(D, y_axis='log', x_axis='time', sr=sr)
    
    # Add colorbar and labels
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.tight_layout()
    
    # Save and close
    plt.savefig(output_path)
    plt.close()

def process_audio_file(input_file, chunk_duration=30):
    """
    Process a long audio file into spectrograms.
    
    Parameters:
    input_file (str): Path to input WAV file
    chunk_duration (int): Duration of each chunk in seconds (default: 30)
    """
    # Create output directory
    base_path = os.path.dirname(input_file)
    output_dir = create_output_directory(base_path)
    
    # Get total duration of the audio file
    duration = librosa.get_duration(path=input_file)
    total_chunks = int(np.ceil(duration / chunk_duration))
    
    print(f"Total duration: {duration:.2f} seconds")
    print(f"Number of chunks: {total_chunks}")
    
    # Process each chunk
    for chunk_idx in range(total_chunks):
        start_time = chunk_idx * chunk_duration
        
        # Load audio chunk
        try:
            y, sr = load_audio_chunk(input_file, start_time, chunk_duration)
            
            # Generate output filename
            output_path = os.path.join(
                output_dir, 
                f'spectrogram_chunk_{chunk_idx:04d}.png'
            )
            
            # Create and save spectrogram
            create_spectrogram(y, sr, output_path)
            
            print(f"Processed chunk {chunk_idx + 1}/{total_chunks}")
            
        except Exception as e:
            print(f"Error processing chunk {chunk_idx}: {str(e)}")
            continue

if __name__ == "__main__":
    # Example usage
    input_file = "/home/ahmed/Task-Aware-audio-coding-perceptual/Data/dataset/S02_P05.wav"
    process_audio_file(input_file, chunk_duration=3)  

In [None]:
def create_output_directory(base_path):
    """Create output directory for spectrograms if it doesn't exist."""
    output_dir = os.path.join(base_path, 'spectrograms_S02_P05')
    os.makedirs(output_dir, exist_ok=True)
    return output_dir

def load_audio_chunk(file_path, start_time, chunk_duration):
    """Load a chunk of audio from the file."""
    y, sr = librosa.load(file_path, offset=start_time, duration=chunk_duration)
    return y, sr

def create_spectrogram(y, sr, output_path):
    """Create and save a spectrogram for the given audio chunk."""
    plt.figure(figsize=(10, 4))
    
    # Create spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    librosa.display.specshow(D, y_axis='log', x_axis='time', sr=sr)
    
    # Add colorbar and labels
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.tight_layout()
    
    # Save and close
    plt.savefig(output_path)
    plt.close()

def process_audio_file(input_file, max_duration=3600, chunk_duration=3):
    """
    Process a long audio file into spectrograms.
    
    Parameters:
    input_file (str): Path to input WAV file
    max_duration (int): Maximum duration to process in seconds (default: 3600 = 1 hour)
    chunk_duration (int): Duration of each chunk in seconds (default: 3)
    """
    # Create output directory
    base_path = os.path.dirname(input_file)
    output_dir = create_output_directory(base_path)
    
    # Get total duration of the audio file or use max_duration
    total_duration = min(librosa.get_duration(path=input_file), max_duration)
    total_chunks = int(np.ceil(total_duration / chunk_duration))
    
    print(f"Processing duration: {total_duration:.2f} seconds")
    print(f"Number of chunks: {total_chunks}")
    
    # Process each chunk
    for chunk_idx in range(total_chunks):
        start_time = chunk_idx * chunk_duration
        
        # Stop if we've reached the max duration
        if start_time >= total_duration:
            break
        
        # Load audio chunk
        try:
            y, sr = load_audio_chunk(input_file, start_time, chunk_duration)
            
            # Generate output filename
            output_path = os.path.join(
                output_dir, 
                f'spectrogram_chunk_{chunk_idx:04d}.png'
            )
            
            # Create and save spectrogram
            create_spectrogram(y, sr, output_path)
            
            print(f"Processed chunk {chunk_idx + 1}/{total_chunks}")
            
        except Exception as e:
            print(f"Error processing chunk {chunk_idx}: {str(e)}")
            continue

if __name__ == "__main__":
    # Example usage
    input_file = "/home/ahmed/Task-Aware-audio-coding-perceptual/Data/dataset/S02_P05.wav"
    process_audio_file(input_file, max_duration=3600, chunk_duration=3)

In [1]:
import os
import numpy as np
import torch
import torchaudio
import matplotlib.pyplot as plt
import librosa
import librosa.display

def create_output_directory(base_path):
    """Create output directory for spectrograms if it doesn't exist."""
    output_dir = os.path.join(base_path, 'spectrograms_S02_U03CH1')
    os.makedirs(output_dir, exist_ok=True)
    return output_dir

def load_audio_chunk(file_path, start_time, chunk_duration, device='cuda'):
    """
    Load a chunk of audio from the file using torchaudio.
    
    Parameters:
    file_path (str): Path to the audio file
    start_time (float): Start time of the chunk in seconds
    chunk_duration (float): Duration of the chunk in seconds
    device (str): Device to load the tensor on (cuda or cpu)
    
    Returns:
    torch.Tensor: Audio chunk
    int: Sample rate
    """
    # First, get the sample rate
    info = torchaudio.info(file_path)
    sample_rate = info.sample_rate
    
    # Calculate frame offsets
    frame_offset = int(start_time * sample_rate)
    num_frames = int(chunk_duration * sample_rate)
    
    # Load audio chunk
    waveform, loaded_sample_rate = torchaudio.load(
        file_path, 
        frame_offset=frame_offset, 
        num_frames=num_frames
    )
    
    # Ensure mono channel
    if waveform.size(0) > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    
    # Move to specified device
    return waveform.to(device), sample_rate

def create_spectrogram(y, sr, output_path, device='cuda'):
    """
    Create and save a spectrogram for the given audio chunk.
    
    Parameters:
    y (torch.Tensor): Audio chunk
    sr (int): Sample rate
    output_path (str): Path to save the spectrogram
    device (str): Device to perform computations on
    """
    # Ensure y is on CPU for numpy operations
    y_np = y.cpu().numpy().squeeze()
    
    plt.figure(figsize=(10, 4))
    
    # Use fixed STFT parameters
    n_fft = 2048
    hop_length = 512
    
    try:
        # Compute spectrogram using librosa
        D = librosa.stft(y_np, n_fft=n_fft, hop_length=hop_length)
        D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
        
        # Display spectrogram
        librosa.display.specshow(D_db, sr=sr, hop_length=hop_length, 
                                 x_axis='time', y_axis='log')
        
        # Add colorbar and labels
        plt.colorbar(format='%+2.0f dB')
        plt.title('Spectrogram')
        plt.tight_layout()
        
        # Save and close
        plt.savefig(output_path)
        plt.close()
    
    except Exception as e:
        print(f"Error creating spectrogram: {e}")
        plt.close()
    
    # Clear GPU memory
    torch.cuda.empty_cache()

def process_audio_file(input_file, max_duration=3600, chunk_duration=3, device='cuda'):
    """
    Process a long audio file into spectrograms using CUDA.
    
    Parameters:
    input_file (str): Path to input WAV file
    max_duration (int): Maximum duration to process in seconds (default: 3600 = 1 hour)
    chunk_duration (int): Duration of each chunk in seconds (default: 3)
    device (str): Device to use for processing (cuda or cpu)
    """
    # Set device
    if device == 'cuda' and not torch.cuda.is_available():
        print("CUDA not available. Falling back to CPU.")
        device = 'cpu'
    
    # Create output directory
    base_path = os.path.dirname(input_file)
    output_dir = create_output_directory(base_path)
    
    # Get total duration of the audio file or use max_duration
    total_duration = min(librosa.get_duration(path=input_file), max_duration)
    total_chunks = int(np.ceil(total_duration / chunk_duration))
    
    print(f"Processing duration: {total_duration:.2f} seconds")
    print(f"Number of chunks: {total_chunks}")
    print(f"Using device: {device}")
    
    # Process each chunk
    for chunk_idx in range(total_chunks):
        start_time = chunk_idx * chunk_duration
        
        # Stop if we've reached the max duration
        if start_time >= total_duration:
            break
        
        # Load audio chunk
        try:
            y, sr = load_audio_chunk(input_file, start_time, chunk_duration, device)
            
            # Generate output filename
            output_path = os.path.join(
                output_dir, 
                f'spectrogram_chunk_{chunk_idx:04d}.png'
            )
            
            # Create and save spectrogram
            create_spectrogram(y, sr, output_path, device)
            
            print(f"Processed chunk {chunk_idx + 1}/{total_chunks}")
            
            # Explicit memory cleanup
            del y
            torch.cuda.empty_cache()
            
        except Exception as e:
            print(f"Error processing chunk {chunk_idx}: {str(e)}")
            continue

if __name__ == "__main__":
    # Example usage
    input_file = "/home/ahmed/Task-Aware-audio-coding-perceptual/Data/dataset/S02_U03.CH1.wav"
    process_audio_file(input_file, max_duration=3600, chunk_duration=3)

Processing duration: 3600.00 seconds
Number of chunks: 1200
Using device: cuda
Processed chunk 1/1200
Processed chunk 2/1200
Processed chunk 3/1200
Processed chunk 4/1200
Processed chunk 5/1200
Processed chunk 6/1200
Processed chunk 7/1200
Processed chunk 8/1200
Processed chunk 9/1200
Processed chunk 10/1200
Processed chunk 11/1200
Processed chunk 12/1200
Processed chunk 13/1200
Processed chunk 14/1200
Processed chunk 15/1200
Processed chunk 16/1200
Processed chunk 17/1200
Processed chunk 18/1200
Processed chunk 19/1200
Processed chunk 20/1200
Processed chunk 21/1200
Processed chunk 22/1200
Processed chunk 23/1200
Processed chunk 24/1200
Processed chunk 25/1200
Processed chunk 26/1200
Processed chunk 27/1200
Processed chunk 28/1200
Processed chunk 29/1200
Processed chunk 30/1200
Processed chunk 31/1200
Processed chunk 32/1200
Processed chunk 33/1200
Processed chunk 34/1200
Processed chunk 35/1200
Processed chunk 36/1200
Processed chunk 37/1200
Processed chunk 38/1200
Processed chunk 39