In [12]:
import speech_recognition as sr
from moviepy.editor import VideoFileClip
import os
from icecream import ic
import pandas as pd

def transcribe_video(video_path:str):
    # Extract the filename without extension and directory from the video_path
    video_filename = os.path.splitext(os.path.basename(video_path))[0]
    audio_path = f"zackdfilms/{video_filename}_audio.wav"  # Name the audio file after the video
    ic('Saving audio: ',audio_path)

    # Use MoviePy to extract the audio from the video
    video_clip = VideoFileClip(video_path)
    video_clip.audio.write_audiofile(audio_path)

    # Initialize the recognizer
    r = sr.Recognizer()

    # Use the recognizer to transcribe the audio
    with sr.AudioFile(audio_path) as source:
        audio_data = r.record(source)
        # Recognize speech using Google Web Speech API
        try:
            ic(audio_data)
            text = r.recognize_google(audio_data)
            # print("Transcription: ", text)
            df = pd.read_csv('zackdfilms/english_captions.csv')
            df = pd.concat([df,pd.DataFrame({'filename':[video_filename],'captions':[text]})])
            df.to_csv('zackdfilms/english_captions.csv',index=False)
        except sr.UnknownValueError:
            print("Google Web Speech API could not understand audio")
        except sr.RequestError as e:
            print(f"Could not request results from Google Web Speech API; {e}")
    
    # The audio file is not removed in this version so that it's saved with the video's name followed by "_audio"

# Example usage
transcribe_video("zackdfilms/raw/"+'8_How A Shotgun Shell Works 🤔.mp4')


ic| 'Saving audio: ':

 'Saving audio: '
    audio_path: 'zackdfilms/8_How A Shotgun Shell Works 🤔_audio.wav'


MoviePy - Writing audio in zackdfilms/8_How A Shotgun Shell Works 🤔_audio.wav


                                                                    

MoviePy - Done.


ic| df:                              filename  \
        0                                test   
        1           1_What Is A Guinea Worm 😨   
        2       8_How A Shotgun Shell Works 🤔   
        3  7_How Toe-To-Thumb Surgery Works 😱   
        4                9_What Are Scabies 😨   
        5           1_What Is A Guinea Worm 😨   
        0       8_How A Shotgun Shell Works 🤔   
        
                                                    captions  
        0  living room mein sofa Ke Niche kachra Jama ho ...  
        1  this is a Guinea worm it's a very parasite tha...  
        2  if you looked inside of a shotgun shell you se...  
        3  if you lose your son and it's not able to be r...  
        4  these are scabies and were actually caused by ...  
        5  this is a Guinea worm it's a very parasite tha...  
        0  if you looked inside of a shotgun shell you se...  


In [6]:
filenames = next(os.walk('zackdfilms/raw'), (None, None, []))[2]  
filenames_s = sorted(filenames, key=lambda x: int(x.split('_')[0]))
filenames_s

['0_How A Tranquilizer Works 💉.mp4',
 '1_What Is A Guinea Worm 😨.mp4',
 '2_How A Taser Works 😱.mp4',
 '3_How A Hand Transplant Works ✋.mp4',
 '4_How A Grenade Works 💣.mp4',
 '5_How A Keys Unlocks 🔑.mp4',
 '6_What Is A Tapeworm 😨.mp4',
 '7_How Toe-To-Thumb Surgery Works 😱.mp4',
 '8_How A Shotgun Shell Works 🤔.mp4',
 '9_What Are Scabies 😨.mp4',
 '10_How Do Magicians Swallow Swords🗡️.mp4',
 '11_Why Straight Razors Are Better 🤔.mp4',
 '12_Removing Blood Clots with Vacuum 😨.mp4',
 '13_How A Derma Roller Works 🤔.mp4',
 '14_Bullets Colliding Mid-Air 🤔 (explained).mp4',
 '15_Can You Choke on Your Tongue 😨.mp4',
 '16_How Gas Pumps Know When To Stop⛽🤔.mp4',
 '17_Why You Snore 😴.mp4',
 '18_How A Mouse Trap Works 🐭.mp4',
 '19_What Is Dandruff Really 🤔.mp4',
 '20_How A Bulletproof Vest Works 😱.mp4',
 '21_Sinus Lift Surgery Explained 🤔.mp4',
 '22_What Is A Skin Tag 😨.mp4',
 '23_Why You Should Floss 😨.mp4',
 '24_Why Pore Strips Can Be Bad 🤔.mp4',
 '25_What Is An Ingrown Toenail 😨.mp4',
 '26_How A Bal

In [9]:
from concurrent.futures import ThreadPoolExecutor

with ThreadPoolExecutor() as executor:
    executor.map(transcribe_video, list(map(lambda x:'zackdfilms/raw/'+x,filenames_s)))

icicicic|ic |

|ic'  |||Saving audio: '   ''Saving audio: ic'icic'Saving audio: ':'|Saving audio: ||Saving audio: Saving audio: ' : '  ''ic :':'':': Saving audio: |' Saving audio:  Saving audio: 'Saving audio: ' Saving audio:  ''''Saving audio: 
'''':Saving audio: 'Saving audio: :    :Saving audio: 
Saving audio: '
'  audio_path    '     '

:''audio_pathaudio_path
'    :     Saving audio: Saving audio: ::    Saving audio: audio_path 'audio_path''  'audio_path':zackdfilms/1_What Is A Guinea Worm 😨_audio.wav
'
:':
'Saving audio:      zackdfilms/2_How A Taser Works 😱_audio.wavzackdfilms/0_How A Tranquilizer Works 💉_audio.wav          
'''audio_path'audio_path''zackdfilms/3_How A Hand Transplant Works ✋_audio.wavaudio_path

:
zackdfilms/4_How A Grenade Works 💣_audio.wav:zackdfilms/5_How A Keys Unlocks 🔑_audio.wav': '    ' 
' 
audio_path'
zackdfilms/9_What Are Scabies 😨_audio.wav':'zackdfilms/7_How Toe-To-Thumb Surgery Works 😱_audio.wav 
zackdfilms/8_How A Shotgun Shell Works 🤔_audio.wav''zackdfilms/6_Wha

MoviePy - Writing audio in zackdfilms/1_What Is A Guinea Worm 😨_audio.wav


                                                        

MoviePy - Writing audio in zackdfilms/0_How A Tranquilizer Works 💉_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/3_How A Hand Transplant Works ✋_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/2_How A Taser Works 😱_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/9_What Are Scabies 😨_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/5_How A Keys Unlocks 🔑_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/6_What Is A Tapeworm 😨_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/7_How Toe-To-Thumb Surgery Works 😱_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/4_How A Grenade Works 💣_audio.wav


                                                                

MoviePy - Writing audio in zackdfilms/8_How A Shotgun Shell Works 🤔_audio.wav


chunk:   1%|          | 6/867 [00:00<01:09, 12.45it/s, now=None]
chunk:   0%|          | 0/633 [00:00<?, ?it/s, now=None]

chunk:   0%|          | 0/591 [00:00<?, ?it/s, now=None]



chunk:   0%|          | 0/713 [00:00<?, ?it/s, now=None]


chunk:   0%|          | 0/653 [00:00<?, ?it/s, now=None]




chunk:   0%|          | 0/684 [00:00<?, ?it/s, now=None]





chunk:   0%|          | 0/542 [00:00<?, ?it/s, now=None]







chunk:   0%|          | 0/739 [00:00<?, ?it/s, now=None]






chunk:   3%|▎         | 30/867 [00:00<00:11, 71.10it/s, now=None]
chunk:   6%|▋         | 40/633 [00:00<00:01, 377.24it/s, now=None]

chunk:   6%|▋         | 37/591 [00:00<00:01, 367.54it/s, now=None]


chunk:   5%|▌         | 34/653 [00:00<00:01, 337.97it/s, now=None]



chunk:   5%|▌         | 36/713 [00:00<00:02, 252.76it/s, now=None]




chunk:   5%|▌         | 36/684 [00:00<00:01, 351.44it/s, now=None]








chunk:   0%|          | 0/652 [00:00<?, ?it/s, now=None]





chunk:   5%|▌         | 45/

MoviePy - Done.







chunk:  82%|████████▏ | 561/684 [00:12<00:02, 41.49it/s, now=None]







chunk:  49%|████▉     | 429/867 [00:12<00:13, 32.81it/s, now=None]

chunk:  96%|█████████▌| 566/591 [00:12<00:00, 33.76it/s, now=None]
chunk:  95%|█████████▌| 603/633 [00:12<00:00, 31.65it/s, now=None]


chunk:  90%|████████▉ | 586/653 [00:12<00:01, 37.94it/s, now=None]



chunk:  95%|█████████▌| 680/713 [00:12<00:00, 45.24it/s, now=None]






chunk: 100%|██████████| 572/572 [00:12<00:00, 31.99it/s, now=None]








chunk:  93%|█████████▎| 608/652 [00:12<00:01, 24.82it/s, now=None]




chunk:  82%|████████▏ | 561/684 [00:12<00:02, 41.49it/s, now=None]

chunk:  96%|█████████▌| 566/591 [00:12<00:00, 33.76it/s, now=None]


chunk:  90%|████████▉ | 586/653 [00:12<00:01, 37.94it/s, now=None]







chunk:  95%|█████████▍| 702/739 [00:12<00:00, 68.09it/s, now=None]








chunk:  93%|█████████▎| 608/652 [00:12<00:01, 24.82it/s, now=None]






chunk:  49%|████▉     | 429/867 [00:13<00:13, 32.81it/s, now=None]




MoviePy - Done.







chunk:  86%|████████▌ | 588/684 [00:13<00:02, 33.30it/s, now=None]







chunk:  51%|█████     | 442/867 [00:13<00:19, 22.26it/s, now=None]

chunk:  98%|█████████▊| 582/591 [00:13<00:00, 21.37it/s, now=None]
chunk:  98%|█████████▊| 618/633 [00:13<00:00, 21.02it/s, now=None]


chunk:  92%|█████████▏| 604/653 [00:13<00:02, 23.94it/s, now=None]



chunk:  97%|█████████▋| 694/713 [00:13<00:00, 24.82it/s, now=None]








chunk:  51%|█████     | 442/867 [00:13<00:19, 22.26it/s, now=None]








chunk:  95%|█████████▌| 621/652 [00:13<00:01, 18.80it/s, now=None]



chunk:  97%|█████████▋| 694/713 [00:13<00:00, 24.82it/s, now=None]
chunk:  98%|█████████▊| 618/633 [00:13<00:00, 21.02it/s, now=None]




chunk:  86%|████████▌ | 588/684 [00:13<00:02, 33.30it/s, now=None]

chunk:  98%|█████████▊| 582/591 [00:13<00:00, 21.37it/s, now=None]


chunk:  92%|█████████▏| 604/653 [00:13<00:02, 23.94it/s, now=None]







chunk:  51%|█████▏    | 446/867 [00:14<00:26, 16.06it/s, now=None]








chu

MoviePy - Done.







chunk:  53%|█████▎    | 459/867 [00:14<00:18, 22.01it/s, now=None]
chunk: 100%|█████████▉| 632/633 [00:14<00:00, 19.76it/s, now=None]


chunk:  94%|█████████▍| 617/653 [00:14<00:01, 20.35it/s, now=None]



chunk: 100%|██████████| 713/713 [00:14<00:00, 26.39it/s, now=None]








chunk:  53%|█████▎    | 459/867 [00:14<00:18, 22.01it/s, now=None]


chunk:  94%|█████████▍| 617/653 [00:14<00:01, 20.35it/s, now=None]








chunk:  98%|█████████▊| 638/652 [00:13<00:00, 22.27it/s, now=None]



chunk: 100%|██████████| 713/713 [00:14<00:00, 26.39it/s, now=None]
chunk: 100%|█████████▉| 632/633 [00:14<00:00, 19.76it/s, now=None]

                                                                  




chunk:  53%|█████▎    | 462/867 [00:14<00:25, 15.94it/s, now=None]


chunk:  95%|█████████▍| 620/653 [00:14<00:02, 15.15it/s, now=None]








chunk:  98%|█████████▊| 641/652 [00:14<00:00, 15.21it/s, now=None]




                                                                  


          

MoviePy - Done.







chunk:  54%|█████▍    | 468/867 [00:15<00:21, 18.68it/s, now=None]


chunk:  96%|█████████▌| 624/653 [00:14<00:01, 15.88it/s, now=None]








chunk:  99%|█████████▉| 646/652 [00:14<00:00, 18.08it/s, now=None]




chunk:  54%|█████▍    | 468/867 [00:15<00:21, 18.68it/s, now=None]


chunk:  96%|█████████▌| 624/653 [00:14<00:01, 15.88it/s, now=None]



                                                                  
                                                                  








chunk:  99%|█████████▉| 646/652 [00:14<00:00, 18.08it/s, now=None]




chunk:  55%|█████▍    | 473/867 [00:15<00:19, 20.39it/s, now=None]


chunk:  96%|█████████▌| 626/653 [00:14<00:01, 14.30it/s, now=None]




                                                                  


                                                                  








                                                                  

MoviePy - Done.







chunk:  55%|█████▍    | 476/867 [00:15<00:18, 21.06it/s, now=None]


chunk:  96%|█████████▋| 629/653 [00:14<00:01, 16.17it/s, now=None]








chunk: 100%|█████████▉| 649/652 [00:14<00:00, 15.82it/s, now=None]




                                                                  


                                                                  








                                                                  

MoviePy - Done.







chunk:  55%|█████▍    | 476/867 [00:15<00:18, 21.06it/s, now=None]


chunk:  96%|█████████▋| 629/653 [00:15<00:01, 16.17it/s, now=None]








chunk: 100%|█████████▉| 649/652 [00:14<00:00, 15.82it/s, now=None]








chunk: 100%|█████████▉| 649/652 [00:14<00:00, 15.82it/s, now=None]




chunk:  55%|█████▍    | 476/867 [00:15<00:18, 21.06it/s, now=None]


chunk:  96%|█████████▋| 629/653 [00:15<00:01, 16.17it/s, now=None]








chunk:  55%|█████▌    | 479/867 [00:15<00:28, 13.79it/s, now=None]




chunk:  89%|████████▉ | 612/684 [00:15<00:07, 10.28it/s, now=None]


chunk:  57%|█████▋    | 495/867 [00:15<00:11, 33.39it/s, now=None]




chunk:  90%|█████████ | 618/684 [00:15<00:03, 16.58it/s, now=None]








                                                                  


chunk:  98%|█████████▊| 639/653 [00:15<00:00, 18.94it/s, now=None]




                                                                  


                                                                  

MoviePy - Done.







chunk:  60%|██████    | 521/867 [00:16<00:04, 69.52it/s, now=None]


chunk:  60%|██████    | 521/867 [00:16<00:04, 69.52it/s, now=None]




chunk:  94%|█████████▍| 645/684 [00:15<00:00, 55.25it/s, now=None]


chunk:  61%|██████▏   | 533/867 [00:16<00:04, 68.26it/s, now=None]




chunk:  96%|█████████▌| 656/684 [00:15<00:00, 55.92it/s, now=None]


                                                                  




                                                                  

MoviePy - Done.







chunk:  64%|██████▍   | 558/867 [00:16<00:03, 100.94it/s, now=None]




chunk:  67%|██████▋   | 577/867 [00:16<00:02, 105.97it/s, now=None]




chunk: 100%|█████████▉| 683/684 [00:15<00:00, 74.06it/s, now=None]




                                                                   

MoviePy - Done.


                                                                   

MoviePy - Done.
Google Web Speech API could not understand audio
Google Web Speech API could not understand audio
Google Web Speech API could not understand audio
Google Web Speech API could not understand audio


icic| | dfdf:                         :filename                       filename\  
\        
0                                   0test                          
test           1
              11_What   1Is_What  AIs  GuineaA  Worm Guinea 😨Worm   
         😨0     8
_How         0     A 6Shotgun_What  ShellIs  WorksA  🤔Tapeworm    
😨           

                                                            captions
                                                      
        captions0    
living         0room   livingmein  roomsofa  meinKe  sofaNiche  kachraKe  JamaNiche  hokachra  .Jama. .ho   
.        .1.    this
 is         1a   thisGuinea  isworm  ait 'Guineas a very parasite tha...  
         worm0   itif' s a very parasite tha...  you
         looked0                   insideif  ofyou  aand  shotgunjust shell  food youthat se .contain.  .
  
ic| df:                              filename  \
        0                                test   
        1           1_What Is A Guinea Worm 