### Extract text from tiktok videos
First, manually download the TikTok videos you want to extract the speech text from, using the website: https://ssstik.io/download-tiktok-mp3
and place the downloaded files in the source directory.
Then, run the script, which will first convert from mp3 to wav and then save the extracted text files with extracted speech in the text directory.

In [40]:
import os
import speech_recognition as sr

In [41]:
#conversion of mp3 files into wav using os ffmpeg command

command2wav = "ffmpeg -i {input_filename} {output_filename} -y"

source_directory = 'video_data/source'
output_directory = "video_data/output"
text_directory = "video_data/text"

# iterate over mp3 files in source directory and convert them into wav
for filename in os.listdir(source_directory):
    i_f = os.path.join(source_directory, filename)
    print("input:",i_f)
    string = i_f.split("/")
    string_2 = os.path.join(output_directory, string[-1])
    o_f = os.path.splitext(string_2)[0] + ".wav" #remove extension from filename and add .wav
    print("output:",o_f)
    actual_command2wav = command2wav.format(input_filename = i_f, output_filename = o_f)
    print("command to be run:",actual_command2wav)
    os.system(actual_command2wav) #ffmpeg command needed (on linux: "sudo apt install ffmpeg")

input: video_data/source/video_1.mp3
output: video_data/output/video_1.wav
command to be run: ffmpeg -i video_data/source/video_1.mp3 video_data/output/video_1.wav -y
input: video_data/source/tik_tok_meloni.mp3
output: video_data/output/tik_tok_meloni.wav
command to be run: ffmpeg -i video_data/source/tik_tok_meloni.mp3 video_data/output/tik_tok_meloni.wav -y


ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --e

In [42]:
#Recognizing text from audio using Google TTS engine (possible formats: .aiff .flac .wav) (max 50 requests a day, ask for API)

recognizer_instance = sr.Recognizer()

for filename in os.listdir(output_directory): #iterate over each wav file in the output directory
    i_f = os.path.join(output_directory, filename)
    wav = sr.AudioFile(i_f) 

    with wav as source:
        recognizer_instance.pause_threshold = 3.0
        audio = recognizer_instance.listen(source)
    try:
        print("#"*40)
        print("\nElaborating message in file '" + i_f + "'...")
        #generate output text filename
        string = os.path.splitext(filename)[0] #get filename without .wav extension
        string2 = string + ".txt"
        o_f = os.path.join(text_directory, string2) #get full output file path
        print("output text filename:",o_f)
        
        #recognize audio
        text = recognizer_instance.recognize_google(audio, language="it-IT", show_all =False)
        print("Result: \n", text)

        with open(o_f, 'w') as f: #write extracted text to file
            f.write(text)
            
    except Exception as e:
        print(e)

########################################

Elaborating message in file 'video_data/output/tik_tok_meloni.wav'...
output text filename: video_data/text/tik_tok_meloni.txt
result2:
{   'alternative': [   {   'confidence': 0.96274823,
                           'transcript': 'Siamo in piena crisi energetica con '
                                         'bollette che sono ormai '
                                         'insostenibili per le famiglie per le '
                                         'imprese il nostro intero tessuto '
                                         'produttivo è a rischio con migliaia '
                                         'di aziende molte migliaia di posti '
                                         'di lavoro che rischiano di non '
                                         'esserci più bisogna intervenire '
                                         "immediatamente l'Unione Europea si è "
                                         'scoperta di colpo stazo dei ri

result2:
{   'alternative': [   {   'confidence': 0.95422369,
                           'transcript': 'fa Powerpoint There is no power '
                                         'point point presentation in '
                                         'PowerPoint Power to Express yourself '
                                         'Please Powerpoint Excel spreadsheet '
                                         'software in PowerPoint Powerpoint '
                                         'Excel'},
                       {   'transcript': 'fa Powerpoint There is no power '
                                         'point point presentation in '
                                         'PowerPoint da Power to Express '
                                         'yourself Please Powerpoint Excel '
                                         'spreadsheet software in PowerPoint '
                                         'Powerpoint Excel'},
                       {   'transcript': 'fa Powerpoint The