# Convert Videos to Text

https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python

In [1]:
# pip install ffmpeg moviepy 
# pip install SpeechRecognition pydub


In [2]:
# Imports
import moviepy.editor as mp
import speech_recognition as sr
import os

In [3]:
def convert_mp4_to_wav(file_name, mp4_path, wav_path):
    '''
    Loads mp4 file and write to .wav file
    
    Inputs:
    - file_name: name of file (without extension)
    - mp4_path: path to folder for mp4 files
    - wav_path: path to folder for wav files
    
    Outputs:
    - Saves .wav file to wav_path
    '''
    # Load mp4 file
    orig_video = mp.VideoFileClip(mp4_path + file_name + ".mp4")
    # Write to wav file
    orig_video.audio.write_audiofile(wav_path + file_name + ".wav")

    return

In [8]:
def transcribe_video(file_name, mp4_path, wav_path, text_path):
    '''
    Use Speech Recongizer library to transcribe speech
    
    Inputs:
    - file_name: name of the file (without extension)
    - wav_path: path to folder for wav files
    - mp4_path: path to folder for mp4 files
    - text_path: path to folder for transcribed text files
    
    Outputs:
    - Saves transcribed text to text_path
    '''
    print(f'Working on {file_name}...')
    
    # Convert mp4 file to wav file
    print('Converting mp4 to wav...')
    convert_mp4_to_wav(file_name, mp4_path, wav_path)
    
    # Initialize the recognizer
    r = sr.Recognizer()
    
    # Open the audio file
    print('Transcribing...')
    
    with sr.AudioFile(wav_path + file_name + ".wav") as source:
        # Load audio data
        audio_data = r.record(source)
        # Convert from speech to text
        text = r.recognize_google(audio_data)

    # Write to text file
    with open(text_path + file_name + ".txt", "w") as text_file:
        text_file.write(text)
        
    print()
    return

In [5]:
# Paths to directories
mp4_path = 'MP4_Files/'
wav_path = 'WAV_Files/'
text_path = 'Text_Files_v1/'

# Create empty directories for wav and text files (if they don't exist)
if not os.path.exists(wav_path):
    os.makedirs(wav_path)
    
if not os.path.exists(text_path):
    os.makedirs(text_path)

In [6]:
# List of file names
file_list = os.listdir(mp4_path) # List all files in mp4 directory
# Updated list of files names
# remove extension and skip files that start with '.' (e.g. ipynb checkpoints)
file_list = [x.replace('.mp4', '')for x in file_list if x[0] != '.'] # Remove extension

In [9]:
# Transcribe all files in list
for file in file_list:
    transcribe_video(file, mp4_path, wav_path, text_path)

Working on 223_2.26.20_S_SC...
Converting mp4 to wav...


chunk:   2%|▏         | 174/7047 [00:00<00:03, 1735.42it/s, now=None]

MoviePy - Writing audio in WAV_Files/223_2.26.20_S_SC.wav


                                                                      

MoviePy - Done.
Transcribing...
result2:
{   'alternative': [   {   'confidence': 0.9549064,
                           'transcript': 'and start simulation hi class how '
                                         'are you guys doing today how are you '
                                         "I'm good thank you we're going to "
                                         'talk about some Behavior '
                                         'expectations for our classroom today '
                                         'does anyone have one off the top of '
                                         'their head that we should include in '
                                         'our expectations for class are you '
                                         'going to show up on time and like be '
                                         "ready to work yeah thank you that's "
                                         'a great one does everyone agree with '
                                      

chunk:   4%|▎         | 253/6877 [00:00<00:02, 2455.92it/s, now=None]

MoviePy - Writing audio in WAV_Files/210_1.31.20_S_SC.wav


                                                                      

MoviePy - Done.
Transcribing...
result2:
{   'alternative': [   {   'confidence': 0.9429642,
                           'transcript': 'begin simulation good afternoon '
                                         "class how are we doing today I'm "
                                         "fine thank you it's great to have "
                                         'all of you with me today we are a '
                                         'couple weeks into the semester but I '
                                         'thought it would be a good idea for '
                                         'us to review some of our classroom '
                                         'Norms so I want to get right into it '
                                         'so I want to remind you guys that '
                                         'one of our first classrooms or '
                                         'something to contribute why do you '
                                         'thi

chunk:   3%|▎         | 226/6924 [00:00<00:02, 2255.17it/s, now=None]

MoviePy - Writing audio in WAV_Files/228_3.4.20_S_SC.wav


                                                                      

MoviePy - Done.
Transcribing...
result2:
{   'alternative': [   {   'confidence': 0.94297504,
                           'transcript': 'start simulation hi everybody I Miss '
                                         'Murphy hi there so I just wanted to '
                                         'review some of our expectations at '
                                         'our classroom today so the first one '
                                         "I'm thinking of is raising your hand "
                                         "that's a pretty big one when she say "
                                         'why is it important I think '
                                         'everybody yeah totally raise your '
                                         "hand more demons something you're "
                                         "saying over there oh sorry no you're "
                                         "fine so something that's also "
                                        

chunk:   1%|▏         | 102/6898 [00:00<00:07, 940.12it/s, now=None]

MoviePy - Writing audio in WAV_Files/226_2.26.20_S_SC.wav


                                                                      

MoviePy - Done.
Transcribing...
result2:
{   'alternative': [   {   'confidence': 0.94154656,
                           'transcript': 'hello class hello hi my name is Miss '
                                         'golf how are you okay I am doing '
                                         "well so today we're going to talk "
                                         'about setting some classroom '
                                         'expectations so the first one is to '
                                         'be respectful so raise your hand if '
                                         'you think you know what that means '
                                         "where's my pen you took it I didn't "
                                         'picture picture Meena do you know '
                                         'what that means to be respectful '
                                         'like be nice to other people who '
                                         "don'

### Test 2: Google Cloud Speech API

This requires credentials.

In [12]:
temp_file = wav_path + file_list[0] + '.wav'

In [13]:
# use the audio file as the audio source
r = sr.Recognizer()
with sr.AudioFile(temp_file) as source:
    audio = r.record(source)  # read the entire audio file

In [19]:
# recognize speech using Google Cloud Speech
GOOGLE_CLOUD_SPEECH_CREDENTIALS = 'Archive/google_secret_key.json'
try:
    print("Google Cloud Speech thinks you said " + r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS))
except sr.UnknownValueError:
    print("Google Cloud Speech could not understand audio")
except sr.RequestError as e:
    print("Could not request results from Google Cloud Speech service; {0}".format(e))

Could not request results from Google Cloud Speech service; 400 Sync input too long. For audio longer than 1 min use LongRunningRecognize with a 'uri' parameter.


Does not work on audio longer than 1 minute. Would need to upload files to google cloud storage to transcribe them. Found this article that seems helful. This option would allow us to use the speaker diarization functionality. There is a cost associated with the storage and using the API, but it is free to start.

https://towardsdatascience.com/how-to-use-google-speech-to-text-api-to-transcribe-long-audio-files-1c886f4eb3e9