In [None]:
import librosa
import librosa.util as lib_util
import numpy as np
import soundfile as sf
import parselmouth
import matplotlib.pyplot as plt
from parselmouth.praat import call

### Pitch and time shift

In [None]:
for doctor_index in range(1,11):
    for patient_index in range(1,11):
        print(f"Processing doctor: {doctor_index}, patient: {patient_index}.")
        audiofile = f"Arst_{doctor_index:03}/Patsient_{patient_index:03}/toorfailid/arsti_salvestus_orig_{doctor_index:02}_{patient_index:02}.WAV"
        try:
            sound, sampling_rate = librosa.load(audiofile, mono=True, sr=16000)
        except FileNotFoundError:
            print(f"File not found: {audiofile}. Skipping.")
            continue
        except Exception as e:
            print(f"Error loading {audiofile}: {e}. Skipping.")
            continue
        rates = [0.8, 1.2]
        steps = [2, -2]
        for i in range(2):
            print(f"Creating modified audio; pitch shift: {steps[i]}, speed: {rates[i]}.")
            y_shifted = librosa.effects.pitch_shift(sound, sr=sampling_rate, n_steps=steps[i])
            y_stretched = librosa.effects.time_stretch(y_shifted, rate=rates[i]) 
            sf.write(f'modified_audio/arsti_salvestus_orig_{doctor_index:02}_{patient_index:02}-step={steps[i]}-rate={rates[i]}.WAV', y_stretched, sampling_rate)

### Formant shift

In [None]:
def formant_shift_praat(input_audio_path, output_audio_path, formant_shift_ratio=1.1):
    """
    Shift formants using Praat's Change Gender method via Parselmouth.
    input_audio_path: Path to the input audio file.
    output_audio_path: Path to save the formant-shifted audio.
    formant_shift_ratio: Ratio to shift formants. >1 raises formants, <1 lowers formants.
    """
    sound = parselmouth.Sound(input_audio_path)

    # Apply the Praat "Change Gender" function to shift the formants
    shifted_sound = call(sound, "Change gender", 1, 70, formant_shift_ratio, 1, 1, 1)
    # change gender - (pitch floor, pitch ceil, fromant_shift_ratio, new pitch median, pitch_range_factor, duration_factor)

    # We lower the amplitude, so that they won't be clipped off
    max_amplitude = max(abs(shifted_sound.values.min()), abs(shifted_sound.values.max()))
    scaled_shifted_sound = shifted_sound * (1/max_amplitude)

    values = scaled_shifted_sound.values
    #print(len(values[0][:64000]))

    # Plot the waveform
    """plt.plot(values[0][:64000])
    plt.title("Waveform of the Sound")
    plt.xlabel("Time (samples)")
    plt.ylabel("Amplitude")
    plt.show()"""

    scaled_shifted_sound.save(output_audio_path, 'WAV')

In [None]:
for doctor_index in range(1,5):
    for patient_index in range(1,11):
        print(f"Processing doctor: {doctor_index}, patient: {patient_index}")
        audiofile = f"Arst_{doctor_index:03}/Patsient_{patient_index:03}/toorfailid/arsti_salvestus_orig_{doctor_index:02}_{patient_index:02}.WAV"
        try:
            sound, sampling_rate = librosa.load(audiofile, mono=True, sr=16000)
        except FileNotFoundError:
            print(f"File not found: {audiofile}. Skipping.")
            continue
        except Exception as e:
            print(f"Error loading file {audiofile}: {e}. Skipping.")
            continue
        audiofile_mono = f'Arst_{doctor_index:03}/Patsient_{patient_index:03}/toorfailid/arsti_salvestus_orig_{doctor_index:02}_{patient_index:02}_mono.WAV'
        sf.write(audiofile_mono, sound, sampling_rate)
        formants = [0.8, 1.2]
        for formant in formants:
            print(f"Applying formant shift with ratio: {formant}.")
            formant_shift_praat(audiofile_mono, f'modified_audio/arsti_salvestus_orig_{doctor_index:02}_{patient_index:02}-formant-shift-ratio={formant}.WAV', formant)