## Imports

In [1]:
import numpy as np
import torch
import torchaudio as ta
import sounddevice as sd
import IPython
from IPython.display import display
import ipywidgets as widgets
import matplotlib.pyplot as plt

## Audio Settings

In [2]:
audio_file_path = "../../Data/sounds/Night_and_Day_by_Virginia_Woolf_48khz.wav"
audio_sample_rate = 48000
audio_range_sec = [ 10.0, 14.0 ]
audio_output_device = 8
audio_buffer_size = 4096

audio_file_gui = widgets.Text(value=audio_file_path, description="Audio File:") 
audio_sample_rate_gui = widgets.IntText(value=audio_sample_rate, description="Audio Sample Rate:")
audio_range_sec_gui_1 = widgets.IntText(value=audio_range_sec[0], description="Audio Range Start (sec):", style={'description_width': 'initial'})
audio_range_sec_gui_2 = widgets.IntText(value=audio_range_sec[1], description="Audio Range End (sec):", style={'description_width': 'initial'})
audio_output_device_gui = widgets.IntText(value=audio_output_device, description="Audio Output Device:", style={'description_width': 'initial'})
audio_buffer_size_gui = widgets.IntText(value=audio_buffer_size, description="Audio Buffer Size:", style={'description_width': 'initial'})

display(audio_file_gui)
display(audio_sample_rate_gui)
display(audio_range_sec_gui_1)
display(audio_range_sec_gui_2)
print(sd.query_devices())
display(audio_output_device_gui)
display(audio_buffer_size_gui)

Text(value='../../Data/sounds/Night_and_Day_by_Virginia_Woolf_48khz.wav', description='Audio File:')

IntText(value=48000, description='Audio Sample Rate:')

IntText(value=10, description='Audio Range Start (sec):', style=DescriptionStyle(description_width='initial'))

IntText(value=14, description='Audio Range End (sec):', style=DescriptionStyle(description_width='initial'))

   0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)
>  1 Microphone Array (Intel® Smart , MME (4 in, 0 out)
   2 Webcam 4 (NDI Webcam Audio), MME (2 in, 0 out)
   3 Webcam 1 (NDI Webcam Audio), MME (2 in, 0 out)
   4 Microphone (Realtek(R) Audio), MME (4 in, 0 out)
   5 Webcam 3 (NDI Webcam Audio), MME (2 in, 0 out)
   6 Webcam 2 (NDI Webcam Audio), MME (2 in, 0 out)
   7 Microsoft Sound Mapper - Output, MME (0 in, 2 out)
<  8 Headphones (Realtek(R) Audio), MME (0 in, 8 out)
   9 Speakers (Realtek(R) Audio), MME (0 in, 8 out)
  10 Primary Sound Capture Driver, Windows DirectSound (2 in, 0 out)
  11 Microphone Array (Intel® Smart Sound Technology for Digital Microphones), Windows DirectSound (4 in, 0 out)
  12 Webcam 4 (NDI Webcam Audio), Windows DirectSound (2 in, 0 out)
  13 Webcam 1 (NDI Webcam Audio), Windows DirectSound (2 in, 0 out)
  14 Microphone (Realtek(R) Audio), Windows DirectSound (4 in, 0 out)
  15 Webcam 3 (NDI Webcam Audio), Windows DirectSound (2 in, 0 out)
  16 Webc

IntText(value=8, description='Audio Output Device:', style=DescriptionStyle(description_width='initial'))

IntText(value=4096, description='Audio Buffer Size:', style=DescriptionStyle(description_width='initial'))

In [3]:
audio_file_path = audio_file_gui.value
audio_sample_rate = audio_sample_rate_gui.value
audio_range_sec[0] = audio_range_sec_gui_1.value
audio_range_sec[1] = audio_range_sec_gui_2.value
audio_output_device = audio_output_device_gui.value
audio_buffer_size = audio_buffer_size_gui.value

## Load Audio File

In [4]:
audio_waveform, _ = ta.load(audio_file_path)
audio_waveform = audio_waveform[:, int(audio_range_sec[0] * audio_sample_rate) : int(audio_range_sec[1] * audio_sample_rate) ]

audio_channel_count = audio_waveform.shape[0]
audio_sample_count = audio_waveform.shape[1]

## Simple Sine Wave for Audio Waveform Generation

In [5]:
sine_phase = 0
sine_amplitude = 1.0
sine_frequency = 440

def gen_sine():
    global sine_phase
    
    # Increment phase for continuous oscillation
    t = torch.arange(sine_phase, sine_phase + audio_buffer_size) / audio_sample_rate
    audio_buffer = torch.sin(2 * torch.pi * sine_frequency * t)
    sine_phase += audio_buffer_size

    return audio_buffer

## Create Audio Playback Buffers

In [6]:
audio_buffer = torch.zeros((audio_buffer_size), dtype=torch.float32)
audio_window = torch.from_numpy(np.hanning(audio_buffer_size))
audio_ring_buffer = torch.zeros((audio_buffer_size * 2), dtype=torch.float32)

## Sample Rate Conversion

In [7]:
def resample(waveform, orig_sr, target_sr):
    transform_resample = ta.transforms.Resample(orig_sr, target_sr)
    waveform_resampled = transform_resample(waveform)
    return waveform_resampled

## Quantization Function

In [8]:
def quantize(waveform, bit_count):
    # Assume float32 in [-1,1]; map to quantized range
    levels = 2 ** bit_count
    waveform_quantized = np.round(waveform * (levels / 2 - 1)) / (levels / 2 - 1)
    return waveform_quantized

## Real-Time Audio

In [9]:
audio_sample_index = 0
audio_resample_rate = audio_sample_rate
audio_bit_count = 32

"""
def audio_callback(out_data, frame_count, time_info, status):

    global audio_ring_buffer
    global audio_sample_index
    
    for i in range(2):
        
        audio_buffer = audio_waveform[0, audio_sample_index:audio_sample_index + audio_buffer_size]

        audio_buffer_proc1 = resample(audio_buffer, audio_sample_rate, audio_resample_rate)
        audio_buffer_proc2 = resample(audio_buffer_proc1, audio_resample_rate, audio_sample_rate)

        #audio_buffer_proc = quantize(audio_buffer_proc, audio_bit_count)
        
        audio_ring_buffer = torch.roll(audio_ring_buffer, -audio_buffer_size // 2)
        audio_ring_buffer[-audio_buffer_size//2:] = 0.0
        audio_ring_buffer[-audio_buffer_size:] += audio_buffer_proc2 * audio_window
        
        audio_sample_index += audio_buffer_size // 2
        
        # loop
        if audio_sample_index >= audio_sample_count - audio_buffer_size:
            audio_sample_index = 0
            
    out_data[:, 0] = audio_ring_buffer[:audio_buffer_size]
"""


def audio_callback(out_data, frame_count, time_info, status):

    global audio_ring_buffer
    global audio_sample_index
    
    for i in range(2):
        
        audio_buffer = gen_sine()
        audio_buffer = resample(audio_buffer, audio_sample_rate, audio_resample_rate)
        audio_buffer = resample(audio_buffer, audio_resample_rate, audio_sample_rate)
        audio_buffer = audio_buffer[:audio_buffer_size]
        audio_buffer = quantize(audio_buffer, audio_bit_count)

        audio_ring_buffer = torch.roll(audio_ring_buffer, -audio_buffer_size // 2)
        audio_ring_buffer[-audio_buffer_size//2:] = 0.0
        audio_ring_buffer[-audio_buffer_size:] += audio_buffer * audio_window
        
        audio_sample_index += audio_buffer_size // 2
        
        # loop
        if audio_sample_index >= audio_sample_count - audio_buffer_size:
            audio_sample_index = 0
            
    out_data[:, 0] = audio_ring_buffer[:audio_buffer_size]


"""
def audio_callback(out_data, frame_count, time_info, status):

    global audio_buffer
    global audio_ring_buffer
    
    audio_buffer[:] = gen_sine()
    audio_buffer = resample(audio_buffer, audio_sample_rate, audio_resample_rate)
    audio_buffer = resample(audio_buffer, audio_resample_rate, audio_sample_rate)
    audio_buffer = audio_buffer[:audio_buffer_size]
    audio_buffer = quantize(audio_buffer, audio_bit_count)

    audio_ring_buffer = torch.roll(audio_ring_buffer, -audio_buffer_size // 2)
    audio_ring_buffer[-audio_buffer_size//2:] = 0.0
    audio_ring_buffer[-audio_buffer_size:] += audio_buffer * audio_window

    out_data[:, 0] = audio_ring_buffer[:audio_buffer_size]
"""

"""
def audio_callback(out_data, frame_count, time_info, status):

    audio_buffer = gen_sine()
    audio_buffer = resample(audio_buffer, audio_sample_rate, audio_resample_rate)
    audio_buffer = resample(audio_buffer, audio_resample_rate, audio_sample_rate)
    audio_buffer = audio_buffer[:audio_buffer_size]
    audio_buffer = quantize(audio_buffer, audio_bit_count)

    out_data[:, 0] = audio_buffer
"""

audio_stream = sd.OutputStream(
    samplerate=audio_sample_rate, blocksize=audio_buffer_size, device=audio_output_device, channels=audio_channel_count,
    callback=audio_callback)

In [10]:
audio_stream.start()

In [11]:
audio_sine_amplitude_gui = widgets.FloatText(value=sine_amplitude, description="Audio Sine Amplitude:", style={'description_width': 'initial'})
audio_sine_frequency_gui = widgets.IntText(value=sine_frequency, description="Audio Sine Frequency:", style={'description_width': 'initial'})
audio_resample_rate_gui = widgets.IntText(value=audio_resample_rate, description="Audio Resample Rate:", style={'description_width': 'initial'})
audio_bit_count_gui = widgets.IntText(value=audio_bit_count, description="Audio Bit Count:", style={'description_width': 'initial'})

display(audio_sine_amplitude_gui)
display(audio_sine_frequency_gui)
display(audio_resample_rate_gui)
display(audio_bit_count_gui)

def on_sine_amplitude_change(value):
    global sine_amplitude
    sine_amplitude = value['new']

def on_sine_frequency_change(value):
    global sine_frequency
    sine_frequency = value['new']

def on_audio_resample_rate_change(value):
    global audio_resample_rate
    audio_resample_rate = value['new']

def on_audio_bit_count_change(value):
    global audio_bit_count
    audio_bit_count = value['new']

audio_sine_amplitude_gui.observe(on_sine_amplitude_change, names='value')
audio_sine_frequency_gui.observe(on_sine_frequency_change, names='value')
audio_resample_rate_gui.observe(on_audio_resample_rate_change, names='value')
audio_bit_count_gui.observe(on_audio_bit_count_change, names='value')


FloatText(value=1.0, description='Audio Sine Amplitude:', style=DescriptionStyle(description_width='initial'))

IntText(value=440, description='Audio Sine Frequency:', style=DescriptionStyle(description_width='initial'))

IntText(value=48000, description='Audio Resample Rate:', style=DescriptionStyle(description_width='initial'))

IntText(value=32, description='Audio Bit Count:', style=DescriptionStyle(description_width='initial'))

In [12]:
#audio_stream.stop()