In [1]:
import librosa #Import the librosa library, which is used for audio and music analysis.
import librosa.display # Import the librosa.display submodule for displaying audio-related visualizations.
import matplotlib.pyplot as plt # Import the matplotlib.pyplot module for creating plots and visualizations.
import numpy as np # Import numpy as np, Numpy is used for numerical computations.
import scipy as sp # Scipy is used for scientific and technical computing.
from scipy import signal # Import the signal module from scipy for signal processing functions.
import random #provides various random number generators.
from scipy.signal import lfilter, freqz #Imports functions of linear filter and frequency Response of a filter.
from IPython.display import display, Audio, HTML # Import the display, Audio, and HTML classes/functions from the IPython.display module.

In [2]:
def generate_excitation(fs, dur, f0, b, a):
    samples = int(fs * dur)
    periods = int(f0 * dur)
    
    excitation = np.zeros(samples)  # Create an array of zeros to represent the excitation signal

    for i in range(periods):
        start = int(i * fs / f0)  # Calculate the index where the period's positive excitation starts
        excitation[start] = 1.0

        end = int((i + 0.5) * fs / f0)  # Calculate the index where the period's negative excitation starts
        excitation[end] = -1.0

    output = lfilter(b, a, excitation)  # Apply a filter to the excitation signal using coefficients 'b' and 'a'

    return output

In [3]:
def sound(f0,f1,b1):
    envelope = np.exp(-b1*t) # Create an envelope by calculating the exponential decay
    wave = envelope*np.sin(2*np.pi*(f0+(f1-f0)*t)*t) # Generate a waveform by combining the envelope with a sinusoidal wave and the frequency of the sinusoidal wave varies linearly from f0 to f1 over time.
    return wave

In [4]:
def glottal_pulse(f0, t): # Define a function that generates a glottal pulse waveform
    return 0.5*(1-np.cos(2*np.pi*f0*t))

def formant_filter(source, freq, t): # Define a function that generates a glottal pulse waveform
    sigma = 2*b*np.pi/fs
    theta = 2*np.pi*freq/fs
    r = np.exp(-sigma)
    c0 = 2*r*np.cos(theta)
    output = np.zeros_like(source)
    for i in range(2, len(t)):
        output[i] = c0*source[i-1] - r*output[i-1] - r*output[i-2] # Apply the formant filter equation to compute the output at the current time step
    return output

In [None]:
def synthesize_signal(F0, F1, F2, F3, t):
    return np.sin(2 * np.pi * F0 * t) * np.exp(-2j * np.pi * F1 * t) * np.exp(-2j * np.pi * F2 * t) * np.exp(-2j * np.pi * F3 * t)

def extract_segment(signal, sample_rate, segment_duration=0.001):
    segment_length = int(sample_rate * segment_duration)  # 1 ms
    start_index = len(signal) // 2 - segment_length // 2
    end_index = start_index + segment_length
    return signal[start_index:end_index]

def compute_magnitude_spectrum(signal, window_length, sample_rate):
    window_size = int(window_length * sample_rate)
    dft = np.fft.fft(signal, n=window_size)
    magnitude_dB = 20 * np.log10(np.abs(dft))
    freq_axis = np.fft.fftfreq(window_size, 1 / sample_rate)
    return magnitude_dB, freq_axis

Use glottal pulse shaping and lip radiation filtering. Add a small amount of aspiration noise
and pitch jitter to enhance naturalness

In [5]:
def glottal_pulse_shaping(f0,t):
    #return glottal_pulse(f0,t)*np.exp(-10*t)
    return 0.5*(1-np.cos(2*np.pi*f0*t))*np.exp(-10*t)
# Create a glottal pulse signal by multiplying the glottal pulse waveform with an exponential decay
def lip_radiation_filter(source, t):
    lip_coeff = 0.01 # Define a coefficient for the lip radiation filter
    lip_out = np.zeros_like(source) # Define a coefficient for the lip radiation filter, zeros_like() function in Python is used to return an array of zeros with the same shape and data type as the array passed to it
    # Apply the lip radiation filter to the source signal
    for i in range(2, len(t)):
        lip_out[i] = (1-lip_coeff)*source[i]+lip_coeff*source[i-1]
    return lip_out

def lip_formant_filter(source,freq,t):
    a, b = formant_filter(freq,t) # Get coefficients for the formant filter based on frequency and tim
    output = np.zeros_like(source)
    for i in range(2, len(t)):
        output[i] = b*source[i-1]-a*output[i-1]-a*output[i-2]
    return output

def aspiration_noise(signal, intensity): # Generate white noise with specified intensity
    noise = np.random.normal(0, intensity, len(signal))
    return signal + noise  # Add the generated noise to the input signal

def pitch_jitter(f0, jitter_factor):
    jitter = np.random.normal(0, jitter_factor, len(t)) # Generate random jitter values and add them to the original pitch values
    return f0 + jitter