In [None]:
import wave
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load the audio file
audio_file_path = "abc.wav"
with wave.open(audio_file_path, 'rb') as wave_file:
    sample_rate = wave_file.getframerate()
    num_channels = wave_file.getnchannels()
    num_frames = wave_file.getnframes()
    audio_signal = np.frombuffer(wave_file.readframes(num_frames), dtype=np.int16)





In [None]:
# Normalize the audio signal to [-1, 1]
audio_signal = audio_signal / (2.0 ** 15)

# Compute the Pitch (F0) using autocorrelation
autocorr = np.correlate(audio_signal, audio_signal, mode='full')
autocorr = autocorr[autocorr.size // 2:]
f0_index = np.argmax(autocorr[sample_rate // 500:sample_rate // 75]) + sample_rate // 500
f0 = sample_rate / f0_index

In [None]:
# Compute the Formants (F1, F2, F3) using LPC analysis
ncoeffs = 2 + sample_rate // 1000
A = np.zeros((ncoeffs, ncoeffs))
for i in range(ncoeffs):
    for j in range(ncoeffs):
        A[i, j] = autocorr[np.abs(i - j)]
coeffs = np.linalg.solve(A[1:], -A[0])
roots = np.roots(np.concatenate(([1], coeffs)))
roots = roots[np.imag(roots) >= 0]
angz = np.arctan2(np.imag(roots), np.real(roots))
formants = sorted(angz * sample_rate / (2 * np.pi))

# Plot the results
times = np.arange(0, num_frames / sample_rate, 1 / sample_rate)
plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1)
plt.plot(times, audio_signal, label='Speech Signal')
plt.xlabel('Time (sec)')
plt.ylabel('Amplitude')
plt.legend(loc='best')

plt.subplot(3, 1, 2)
plt.plot(times[:len(autocorr)], autocorr, label='Autocorrelation')
plt.axvline(x=f0_index / sample_rate, linestyle='--', color='red', label='Pitch (F0)')
plt.xlabel('Time (sec)')
plt.ylabel('Amplitude')
plt.legend(loc='best')

plt.subplot(3, 1, 3)
plt.plot(times[:len(angz)], formants[:3], label='Formants (F1-F3)')
plt.xlabel('Time (sec)')
plt.ylabel('Frequency (Hz)')
plt.ylim(0, 5000)
plt.legend(loc='best')

plt.tight_layout()
plt.show()

In [2]:
import parselmouth
import scipy.io.wavfile as wavfile
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Load the audio signal from a .wav file
filename = "abc.wav"
sampling_freq, signal_data = wavfile.read(filename)

# Convert the signal to float data type
signal_data = signal_data.astype(np.float64)

# Create a Parselmouth Sound object
sound = parselmouth.Sound(signal_data, sampling_freq)

# Extract the fundamental frequency (F0) using the "To Pitch" function
#pitch = sound.to_pitch()
from pydub import AudioSegment
from pydub.effects import pitch_shift

# Load audio file
audio = AudioSegment.from_file("abc.wav", format="wav")
# Shift the pitch up by 12 semitones (1 octave)
pitched_audio = audio.apply_effect(pitch_shift, 12)

# Get the pitch information
pitch = pitched_audio.dBFS


# Extract the formants (F1, F2, and F3) using the "To Formant" function
formants = sound.to_formant()

# Convert the Parselmouth objects to numpy arrays
f0 = pitch.selected_array['frequency']
time_axis = pitch.xs()
formant1 = formants['frequency'][0]
formant2 = formants['frequency'][1]
formant3 = formants['frequency'][2]

# Plot the Pitch (F0) and formants (F1, F2, and F3)
time_vector = np.arange(len(signal_data)) / sampling_freq
plt.plot(time_vector, f0, label="Pitch (F0)")
plt.plot(time_vector, formant1, label="Formant F1")
plt.plot(time_vector, formant2, label="Formant F2")
plt.plot(time_vector, formant3, label="Formant F3")
plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")
plt.title("Pitch and Formants")
plt.legend()

# Show the plot
plt.show()



ImportError: cannot import name 'pitch_shift' from 'pydub.effects' (C:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\pydub\effects.py)