In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/졸업프로젝트

/content/drive/MyDrive/졸업프로젝트


In [None]:
!pip install praat-parselmouth

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# manipulate pitch #
from parselmouth.praat import call

def pitch_bounds(sound):
  # measure pitch ceiling and floor
  broad_pitch = sound.to_pitch_ac(
      None, 50, 15, True, 0.03, 0.45, 0.01, 0.35, 0.14, 500
  )
  # get mean pitch
  broad_mean_f0: float = call(
      broad_pitch, "Get mean", 0, 0, "hertz"
  )

  if broad_mean_f0 > 170:
    pitch_floor = 100
    pitch_ceiling = 500
  elif broad_mean_f0 < 170:
    pitch_floor = 50
    pitch_ceiling = 300
  else:
    pitch_floor = 50
    pitch_ceiling = 500
  return pitch_floor, pitch_ceiling

In [None]:
def pitch_floor(sound):
  return pitch_bounds(sound)[0]

In [None]:
def pitch_ceiling(sound):
  return pitch_bounds(sound)[1]

In [5]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
from pydub import AudioSegment
sound = AudioSegment.from_wav("/content/drive/MyDrive/졸업프로젝트/v1_에코.wav")
sound.export("/content/drive/MyDrive/졸업프로젝트/v1_에코.mp3", format="mp3")

<_io.BufferedRandom name='/content/drive/MyDrive/졸업프로젝트/v1_에코.mp3'>

In [None]:
# initialisation #
from pydub import AudioSegment
import parselmouth

default_arg = {
    "unit": ("ERB", ["ERB", "Hertz", "mel", "logHertz", "semitones"]),
    "method": ("Shift frequencies", ["Shift frequencies", "Multiply frequencies"]),
    #"amount": 0.5,
    "time_step": 0.001,
    "normalize amplitude": True
}

default_arg['amount'] = float(input())

audSeg = AudioSegment.from_mp3("/content/drive/MyDrive/졸업프로젝트/recordings/afrikaans1.mp3")
audSeg.export("test.wav", format="wav")
filename = "/content/drive/MyDrive/졸업프로젝트/test.wav"

sound = parselmouth.Sound(filename)
unit = default_arg["unit"][0]
if unit == 'E':
  unit = "ERB"
elif unit == 'H':
  unit = "Hertz"
elif unit == 'm':
  unit = 'mel'
elif unit == 'l':
  unit = 'logHertz'
elif unit == 's':
  unit = 'semitones'

method: str = default_arg["method"][0]
if method == "S":
  method = "Shift frequencies"
elif method == "M":
  method = "Multiply frequencies"

time_step = default_arg["time_step"]
f0min, f0max = pitch_bounds(sound)
default_arg['f0min'], default_arg['f0max'] = f0min, f0max


-3


In [None]:
# create manipulation object
manipulation = call(sound, "To Manipulation", time_step, f0min, f0max)
# extract pitch tier
pitch_tier = call(manipulation, "Extract pitch tier")
# modify pitch tier and replace it
amount = default_arg['amount']

if method[0] == "S":
  call(pitch_tier, method, sound.xmin, sound.xmax, amount, unit)
else:
  if amount <= 0:
    amount *= -1
  call(pitch_tier, method, sound.xmin, sound.xmax, amount)
call([pitch_tier, manipulation], "Replace pitch tier")

In [None]:
# resynthesize voices
manipulated_sound = call(manipulation, "Get resynthesis (overlap-add)")
if default_arg["normalize amplitude"]:
  manipulated_sound.scale_intensity(70)

In [None]:
# original
from IPython.display import Audio
sound.save("pitch_original.wav", "WAV")

In [None]:
# amount = 5
from IPython.display import Audio
manipulated_sound.save("pitch_raised.wav", "WAV")

In [None]:
# amount = -3
from IPython.display import Audio
manipulated_sound.save("pitch_lowered.wav", "WAV")

In [None]:
# manipulate formants #
formant_args = {
    "unit": ("percent", ["percent"]),
    "formant_shift_ratio": 0.5, ##
    "new_pitch_median": 0,
    "pitch_range_factor": 1,
    "duration_factor": 1,
    "normalize amplitude": True,
}

audSeg = AudioSegment.from_mp3("/content/drive/MyDrive/졸업프로젝트/recordings/afrikaans1.mp3")
audSeg.export("test.wav", format="wav")
filename = "/content/drive/MyDrive/졸업프로젝트/test.wav"

sound = parselmouth.Sound(filename)

formant_shift_ratio = formant_args["formant_shift_ratio"]
new_pitch_median = formant_args["new_pitch_median"]
pitch_range_factor = formant_args["pitch_range_factor"]
duration_factor = formant_args["duration_factor"]
f0min, f0max = pitch_bounds(sound)

number_of_channels = call(sound, 'Get number of channels')
if number_of_channels == 2:
  sound = call(sound, 'Convert to mono')
formant_manipulated_sound = call(
    sound,
    "Change gender",
    f0min,
    f0max,
    formant_shift_ratio,
    new_pitch_median,
    pitch_range_factor,
    duration_factor,
)

if formant_args["normalize amplitude"]:
  formant_manipulated_sound.scale_intensity(70)

In [None]:
# original
sound.save("formant_original.wav", "WAV")

In [None]:
# raise formant = 1.5
formant_manipulated_sound.save("formant_raised.wav", "WAV")

In [None]:
# lower formant = 0.5
formant_manipulated_sound.save("formant_lowered.wav", "WAV")

In [None]:
# manipulate gender and age

audSeg = AudioSegment.from_mp3("/content/drive/MyDrive/졸업프로젝트/recordings/afrikaans1.mp3")
audSeg.export("test.wav", format="wav")
filename = "/content/drive/MyDrive/졸업프로젝트/test.wav"

call(sound, "Scale intensity", 70)
pitch = call(sound, "To Pitch", 0.0, 60, 500)
meanF0 = call(pitch, "Get mean", 0, 0, "Hertz")

if meanF0 > 159:
  gender = "female"
else:
  gender = "male"

if gender == "female":
  male = call(sound, "Change gender", 60, 500, 0.8, 100, 1, 1)
  female = call(sound, "Change gender", 60, 500, 1, 220, 1, 1)
  child = call(sound, "Change gender", 60, 500, 1.5, 350, 1, 1)
elif gender == "male":
  male = call(sound, "Change gender", 60, 500, 1, 100, 1, 1)
  female = call(sound, "Change gender", 60, 500, 1.2, 220, 1, 1)
  child = call(sound, "Change gender", 60, 500, 1.6, 350, 1, 1)

In [None]:
# original
sound.save("gender_original.wav", "WAV")

In [None]:
# male
male.save("gender_male.wav", "WAV")

In [None]:
# female
female.save("gender_female.wav", "WAV")

In [None]:
# child
child.save("gender_child.wav", "WAV")