In [11]:
import pyaudio
import numpy as np
import librosa
import scipy.fft
from scipy.interpolate import interp1d


In [12]:
import pyaudio
import numpy as np

def capture_audio(duration=5, rate=44100, channels=1, chunk=1024):
    """
    Captures audio from the microphone for a given duration.

    :param duration: Duration to record in seconds
    :param rate: Sampling rate
    :param channels: Number of audio channels
    :param chunk: Number of frames per buffer
    :return: Captured audio data as a numpy array
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paFloat32, channels=channels, rate=rate, input=True, frames_per_buffer=chunk)

    print("Capturing audio. Please speak into the microphone.")
    frames = []
    for _ in range(0, int(rate / chunk * duration)):
        data = stream.read(chunk)
        frames.append(np.frombuffer(data, dtype=np.float32))

    print("Audio capture finished.")
    stream.stop_stream()
    stream.close()
    p.terminate()

    return np.concatenate(frames)


In [13]:
import librosa

def pitch_scale(audio, sr, n_steps):
    """
    audio: numpy array representing the audio signal
    sr: int, the sampling rate of the audio
    n_steps: int, number of semitones to shift the pitch. Range [-11, 11]
    """
    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)

def apply_warping_function(frames, warp_function, alpha):
    ### Usage
    # To use these functions, you'll need to provide:
    # - `frames`: A list (or array) of frame data. Each frame should be a NumPy array representing the audio signal.
    # - `warp_function`: A string specifying the warping function to use ('asymmetric', 'symmetric', 'power', 'quadratic', or 'bilinear').
    # - `alpha`: The parameter for the warping function.
    warped_freqs = []
    for frame in frames:
        m = len(frame)
        omega = np.arange(1, m + 1) / m * np.pi
        omega_warped = np.copy(omega)

        if warp_function in ['asymmetric', 'symmetric']:
            omega0 = 7/8 * np.pi
            if warp_function == 'symmetric' and alpha > 1:
                omega0 = 7 / (8 * alpha) * np.pi
            
            mask = omega <= omega0
            omega_warped[mask] = alpha * omega[mask]
            omega_warped[~mask] = alpha * omega0 + ((np.pi - alpha * omega0) / (np.pi - omega0)) * (omega[~mask] - omega0)

            omega_warped[omega_warped >= np.pi] = np.pi - 0.00001 + 0.00001 * omega_warped[omega_warped >= np.pi]

        elif warp_function == 'power':
            omega_warped = np.pi * (omega / np.pi) ** alpha

        elif warp_function == 'quadratic':
            omega_warped = omega + alpha * (omega / np.pi - (omega / np.pi) ** 2)

        elif warp_function == 'bilinear':
            z = np.exp(omega * 1j)
            omega_warped = np.abs(-1j * np.log((z - alpha) / (1 - alpha * z)))

        omega_warped_scaled = omega_warped / np.pi * m
        interp_func = interp1d(np.arange(1, m + 1), frame, kind='linear', fill_value='extrapolate')
        warped_frame = interp_func(omega_warped_scaled)

        if np.isreal(frame[-1]):
            warped_frame[-1] = np.real(warped_frame[-1])

        warped_frame[np.isnan(warped_frame)] = 0
        warped_freqs.append(warped_frame)

    return warped_freqs

def piecewise_linear_transformation(frame, alpha, breakpoint=0.5):

    """
    Apply a piecewise-linear transformation to an audio frame.
    :param frame: Numpy array representing the audio frame.
    :param alpha: Scaling factor for the transformation.
    :param breakpoint: Point in the normalized frequency range [0, 1] where the
                    piecewise transformation changes.
    """
    m = len(frame)
    omega = np.linspace(0, 1, m)  # Normalized frequency range from 0 to 1

    # Piecewise-linear transformation
    # For frequencies below the breakpoint, scale by alpha
    # For frequencies above the breakpoint, scale by 1.5
    omega_warped = np.where(omega < breakpoint, omega * alpha, omega * 1.5)

    # Ensure omega_warped stays within [0, 1]
    omega_warped = np.clip(omega_warped, 0, 1)

    # Apply transformation to the frequency domain
    interp_func = interp1d(omega, frame, kind='linear', fill_value="extrapolate")
    warped_frame = interp_func(omega_warped)

    return warped_frame

def process_audio_data(audio_data, sampling_rate, pitch_shift_steps, warping_type, warping_param):
    # Apply pitch scaling
    transformed_audio = pitch_scale(audio_data, sampling_rate, pitch_shift_steps)
    # Apply frequency warping
    if warping_type in ['bilinear', 'quadratic', 'power', 'piecewise-linear']:
        frames = np.array_split(transformed_audio, len(transformed_audio) // 1024)  # Splitting into frames
        warped_frames = apply_warping_function(frames, warping_type, warping_param)
        transformed_audio = np.concatenate(warped_frames)  # Concatenating the frames back

    return transformed_audio

In [14]:
import soundfile as sf

def save_transformed_audio(transformed_audio, filename="output.wav"):
    sf.write(filename, transformed_audio, 44100, format='WAV', subtype='PCM_24')


In [15]:
def main():
    # Capture audio
    audio_data = capture_audio(duration=5)  # 5 seconds of audio
    sampling_rate = 44100  # Same as in capture_audio
    # Check if audio_data is a one-dimensional numpy array
    if isinstance(audio_data, np.ndarray) and audio_data.ndim == 1:
        print("audio_data is correctly formatted.")
    else:
        print("audio_data is not correctly formatted. It should be a one-dimensional numpy array.")

    # Check if sampling_rate is an integer
    if isinstance(sampling_rate, int):
        print("sampling_rate is correctly formatted.")
    else:
        print("sampling_rate is not correctly formatted. It should be an integer.")

    

    # Save original audio
    sf.write("original_audio.wav", audio_data, sampling_rate, format='WAV', subtype='PCM_24')

    # Apply transformations
    pitch_scaled_audio = pitch_scale(audio_data, sampling_rate, n_steps=4)  # Example pitch scaling
    bilinear_warped_audio = apply_warping_function([pitch_scaled_audio], 'bilinear', 0.2)[0]
    quadratic_warped_audio = apply_warping_function([pitch_scaled_audio], 'quadratic', 0.5)[0]
    power_warped_audio = apply_warping_function([pitch_scaled_audio], 'power', -0.5)[0]
    piecewise_linear_warped_audio = piecewise_linear_transformation(pitch_scaled_audio, alpha=1.0, breakpoint=0.5)

    # Save transformed audio
    sf.write("pitch_scaled_audio.wav", pitch_scaled_audio, sampling_rate, format='WAV', subtype='PCM_24')
    sf.write("bilinear_warped_audio.wav", bilinear_warped_audio, sampling_rate, format='WAV', subtype='PCM_24')
    sf.write("quadratic_warped_audio.wav", quadratic_warped_audio, sampling_rate, format='WAV', subtype='PCM_24')
    sf.write("power_warped_audio.wav", power_warped_audio, sampling_rate, format='WAV', subtype='PCM_24')
    sf.write("piecewise_linear_warped_audio.wav", piecewise_linear_warped_audio, sampling_rate, format='WAV', subtype='PCM_24')

if __name__ == "__main__":
    main()

Capturing audio. Please speak into the microphone.
Audio capture finished.
audio_data is correctly formatted.
sampling_rate is correctly formatted.
