In [1]:
import pyaudio
import numpy as np
import librosa
import scipy.fft
from scipy.interpolate import interp1d


In [1]:
import pyaudio
import numpy as np

def capture_audio():
    CHUNK = 1024  # Number of audio samples per buffer
    FORMAT = pyaudio.paFloat32  # Audio format (32-bit float)
    CHANNELS = 1  # Mono audio
    RATE = 44100  # Sampling rate

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("Capturing audio. Press Ctrl+C to stop.")
    try:
        while True:
            data = stream.read(CHUNK)
            audio_data = np.frombuffer(data, dtype=np.float32)
            # Process the audio_data here
            # ...
    except KeyboardInterrupt:
        print("\nStopping audio capture.")

    stream.stop_stream()
    stream.close()
    p.terminate()

capture_audio()


Capturing audio. Press Ctrl+C to stop.

Stopping audio capture.


In [2]:
import librosa

def pitch_scale(audio, sr, n_steps):
    """
    audio: numpy array representing the audio signal
    sr: int, the sampling rate of the audio
    n_steps: int, number of semitones to shift the pitch. Range [-11, 11]
    """
    return librosa.effects.pitch_shift(audio, sr, n_steps)


def frequency_warp(audio, warping_type, param):
    if warping_type == 'simple_example':
        # This is a placeholder for an actual warping algorithm
        warped_audio = audio * np.exp(param * np.linspace(-1, 1, len(audio)))
    return warped_audio

def frequency_domain_transformation(audio, sr, transform_type, param):
    # Transform to frequency domain
    freq_domain = scipy.fft.fft(audio)

    # Apply specific transformation
    if transform_type == 'bilinear':
        # Placeholder for bilinear transformation logic
        pass
    elif transform_type == 'quadratic':
        # Placeholder for quadratic transformation logic
        pass
    # Add cases for 'power' and 'piecewise-linear'

    # Transform back to time domain
    return scipy.fft.ifft(freq_domain)

def apply_warping_function(frames, warp_function, alpha):
    ### Usage
    # To use these functions, you'll need to provide:
    # - `frames`: A list (or array) of frame data. Each frame should be a NumPy array representing the audio signal.
    # - `warp_function`: A string specifying the warping function to use ('asymmetric', 'symmetric', 'power', 'quadratic', or 'bilinear').
    # - `alpha`: The parameter for the warping function.
    warped_freqs = []
    for frame in frames:
        m = len(frame)
        omega = np.arange(1, m + 1) / m * np.pi
        omega_warped = np.copy(omega)

        if warp_function in ['asymmetric', 'symmetric']:
            omega0 = 7/8 * np.pi
            if warp_function == 'symmetric' and alpha > 1:
                omega0 = 7 / (8 * alpha) * np.pi
            
            mask = omega <= omega0
            omega_warped[mask] = alpha * omega[mask]
            omega_warped[~mask] = alpha * omega0 + ((np.pi - alpha * omega0) / (np.pi - omega0)) * (omega[~mask] - omega0)

            omega_warped[omega_warped >= np.pi] = np.pi - 0.00001 + 0.00001 * omega_warped[omega_warped >= np.pi]

        elif warp_function == 'power':
            omega_warped = np.pi * (omega / np.pi) ** alpha

        elif warp_function == 'quadratic':
            omega_warped = omega + alpha * (omega / np.pi - (omega / np.pi) ** 2)

        elif warp_function == 'bilinear':
            z = np.exp(omega * 1j)
            omega_warped = np.abs(-1j * np.log((z - alpha) / (1 - alpha * z)))

        omega_warped_scaled = omega_warped / np.pi * m
        interp_func = interp1d(np.arange(1, m + 1), frame, kind='linear', fill_value='extrapolate')
        warped_frame = interp_func(omega_warped_scaled)

        if np.isreal(frame[-1]):
            warped_frame[-1] = np.real(warped_frame[-1])

        warped_frame[np.isnan(warped_frame)] = 0
        warped_freqs.append(warped_frame)

    return warped_freqs

def piecewise_linear_transformation(frame, alpha, breakpoint=0.5):

    """
    Apply a piecewise-linear transformation to an audio frame.
    :param frame: Numpy array representing the audio frame.
    :param alpha: Scaling factor for the transformation.
    :param breakpoint: Point in the normalized frequency range [0, 1] where the
                    piecewise transformation changes.
    """
    m = len(frame)
    omega = np.linspace(0, 1, m)  # Normalized frequency range from 0 to 1

    # Piecewise-linear transformation
    # For frequencies below the breakpoint, scale by alpha
    # For frequencies above the breakpoint, scale by 1.5
    omega_warped = np.where(omega < breakpoint, omega * alpha, omega * 1.5)

    # Ensure omega_warped stays within [0, 1]
    omega_warped = np.clip(omega_warped, 0, 1)

    # Apply transformation to the frequency domain
    interp_func = interp1d(omega, frame, kind='linear', fill_value="extrapolate")
    warped_frame = interp_func(omega_warped)

    return warped_frame

def process_audio_data(audio_data, sampling_rate, pitch_shift_steps, warping_type, warping_param):
    # Apply pitch scaling
    transformed_audio = pitch_scale(audio_data, sampling_rate, pitch_shift_steps)
    # Apply frequency warping
    if warping_type in ['bilinear', 'quadratic', 'power', 'piecewise-linear']:
        frames = np.array_split(transformed_audio, len(transformed_audio) // 1024)  # Splitting into frames
        warped_frames = apply_warping_function(frames, warping_type, warping_param)
        transformed_audio = np.concatenate(warped_frames)  # Concatenating the frames back

    return transformed_audio

In [3]:
import soundfile as sf

def save_transformed_audio(transformed_audio, filename="output.wav"):
    sf.write(filename, transformed_audio, 44100, format='WAV', subtype='PCM_24')
