In [8]:
import wave
import speech_recognition as sr
import warnings
warnings.filterwarnings('ignore')

### 1)

In [6]:
with wave.open("exemple_1.wav", 'rb') as wav_file:
    num_channels = wav_file.getnchannels()
    sample_width = wav_file.getsampwidth()
    frame_rate = wav_file.getframerate()
    num_frames = wav_file.getnframes()

    print(f"Number of channels: {num_channels}")
    print(f"Sample width (in bytes): {sample_width}")
    print(f"Frame rate (samples per second): {frame_rate}")
    print(f"Number of frames: {num_frames}")

    audio_data = wav_file.readframes(num_frames)

Number of channels: 1
Sample width (in bytes): 2
Frame rate (samples per second): 16000
Number of frames: 46400


### 2)

In [7]:
recognizer = sr.Recognizer()
with sr.AudioFile("exemple_1.wav") as audio_file:

    audio_data = recognizer.record(audio_file)

    try:
        text_result = recognizer.recognize_google(audio_data)
        print("Text from speech: ", text_result)
    except sr.UnknownValueError:
        print("Google Web Speech API could not understand the audio")
    except sr.RequestError as e:
        print(f"Error with the Google Web Speech API request; {e}")

Text from speech:  I believe you are just talking nonsense


### 3)

In [10]:
import speech_recognition as sr #reconnaissance vocale
import os
from pydub import AudioSegment #manipulation d'audio
from pydub.silence import split_on_silence #manipulation d'audio

r = sr.Recognizer() #Initialisation du Recognizer

def transcription_grand_audio(path): #prend en paramètre le chemin du fichier audio
    sound = AudioSegment.from_wav(path)#load the audio file

    #split the audio on silences
    chunks = split_on_silence(sound,
                              min_silence_len=500,
                              silence_thresh=sound.dBFS-14,
                              keep_silence=500)

    #create a folder "chunks" to store the audio chunks
    folder_name = "chunks"
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)

    #initialize variable 'whole_text' to store the transcription
    whole_text = ""

    #boucle sur chaque morceau pour la transcription
    for i, audio_chunk in enumerate(chunks, start=1):
        #export the audio chunk as a vaw file into 'chunks' folder
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")

        #recognize speech in the audio chunk using google web speech API
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)

        try:
            #attempt to recognize the speech in the audio chunk
            text = r.recognize_google(audio_listened)
        except sr.UnknownValueError as e:
            #handle cases where speech cannot be recognized
            print("Error:", str(e))
        else:
            #capitalize the recognized text and print the result
            text = f"{text.capitalize()}."
            print(chunk_filename, ":", text)
            #append the recognized text to the complete transcription
            whole_text += text

    #return the complete transcription
    return whole_text


In [12]:
transcription_grand_audio("grand_audio.wav")

chunks/chunk1.wav : Here's a bird which he had fixed in a bowery or a country seat.
chunks/chunk2.wav : Add a short distance from the city.
chunks/chunk3.wav : Just that what is now called dutch street.
chunks/chunk4.wav : Soon abounded with proofs of his ingenuity.
chunks/chunk5.wav : Patent smoke.
chunks/chunk6.wav : It required a horse to work some.
chunks/chunk7.wav : Dutch ovens that roasted meat without fire.
chunks/chunk8.wav : Carts that went before the horses.
chunks/chunk9.wav : Weather cox that turned against the wind and other wrong-headed contrivances.
chunks/chunk10.wav : Set astonished and confounded all beholders.


"Here's a bird which he had fixed in a bowery or a country seat.Add a short distance from the city.Just that what is now called dutch street.Soon abounded with proofs of his ingenuity.Patent smoke.It required a horse to work some.Dutch ovens that roasted meat without fire.Carts that went before the horses.Weather cox that turned against the wind and other wrong-headed contrivances.Set astonished and confounded all beholders."

### 4)

In [18]:
def capture_speech(duration=5):
    recognizer = sr.Recognizer()

    with sr.Microphone() as source:
        print(f"Listening for {duration} seconds...")

        recognizer.adjust_for_ambient_noise(source)

        audio_data = recognizer.listen(source, timeout=duration)

        print("Recording complete.")

    try:
        text_result = recognizer.recognize_google(audio_data)
        print("Speech captured:", text_result)
    except sr.UnknownValueError:
        print("Google Web Speech API could not understand the audio")
    except sr.RequestError as e:
        print(f"Error with the Google Web Speech API request; {e}")


### 5)

In [20]:
capture_speech(duration=5)

OSError: No Default Input Device Available