<a href="https://colab.research.google.com/github/bhagesh-codebeast/VideoTranscribeTranslate/blob/main/VideoTranscribeandTranslate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convert video to text

In [3]:
!python --version

Python 3.10.12


In [None]:
!pipx ensurepath
!apt install pipx
!pip install pytube
!pip install moviepy
!pip install -U deep-translator
!pipx install insanely-fast-whisper

In [None]:
from deep_translator import GoogleTranslator
GoogleTranslator().get_supported_languages(as_dict=True)

In [12]:
import os
import re
import json
import subprocess
from pytube import YouTube
from moviepy.editor import VideoFileClip
from deep_translator import GoogleTranslator

class videoTranscribe:
  def __init__(self, input_video_path,  ifw_path, output_audio_path=os.path.join(os.getcwd(),'output_audio.mp3'), ifw_transcript=os.path.join(os.getcwd(),'output.json'),translate=True,source='auto',language='hi'):
    self.input_video_path = input_video_path
    self.output_audio_path = output_audio_path
    self.ifw_transcript = ifw_transcript
    self.ifw_path = ifw_path
    self.translate = translate
    self.source = source
    self.language = language
  def getAudio(self):
    if self.input_video_path and self.input_video_path.startswith('http'):
      YouTube(self.input_video_path).streams.filter(only_audio=True, file_extension='mp4').first().download(filename=self.output_audio_path)
    else:
      video_clip = VideoFileClip(self.input_video_path)
      audio_clip = video_clip.audio
      audio_clip.write_audiofile(self.output_audio_path, codec='mp3')
    return self.output_audio_path
  def transcribeAudio(self):
    if os.path.exists(self.getAudio()):
      command = [self.ifw_path, "--transcript-path", self.ifw_transcript, "--file-name", self.output_audio_path]
      result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
      if result.returncode == 0:
        match = re.search(r'output\.json', result.stdout)
        if match:
          output_json_path = match.group()
      return output_json_path
  def getText(self):
    self.transcribeAudio()
    text = json.loads(open(self.ifw_transcript, "r").read()).get("text", "")
    return text
  def translateText(self):
    if self.translate:
      text_data = self.getText()
      translated = {}
      for text in text_data.split('.'):
        if text:
          translated[text] = {GoogleTranslator(source=self.source, target=self.language).translate(text=str(text))}
      return translated


In [16]:
input_video_path = 'https://www.youtube.com/watch?v=p8QOnty6rSU'
ifw_path = '/root/.local/pipx/venvs/insanely-fast-whisper/bin/insanely-fast-whisper'

instance = videoTranscribe(input_video_path, ifw_path)

In [17]:
%%time
text = instance.translateText()

CPU times: user 11.5 s, sys: 183 ms, total: 11.7 s
Wall time: 1min 35s


In [18]:
text

{' Hello again everyone and welcome back to LearnLinuxTV': {'सभी को फिर से नमस्कार और LearnLinuxTV में आपका पुनः स्वागत है'},
 " In today's episode of Linux Essentials, it's all about the usermod command": {'लिनक्स एसेंशियल के आज के एपिसोड में, यह सब यूजरमॉड कमांड के बारे में है'},
 ' The usermod command helps you, well, modify users, which is where the name comes from': {'यूजरमॉड कमांड आपको उपयोगकर्ताओं को संशोधित करने में मदद करता है, जहां से नाम आता है'},
 " And this isn't the first time that I've gone over user mod on this channel": {'और यह पहली बार नहीं है कि मैं इस चैनल पर उपयोगकर्ता मॉड पर गया हूं'},
 " In fact, it's one of those commands that I use all the time": {'वास्तव में, यह उन आदेशों में से एक है जिनका मैं हर समय उपयोग करता हूं'},
 " So I'm sure if you've caught other videos on my channel, you've probably seen me use the user mod command here and there": {'इसलिए मुझे यकीन है कि अगर आपने मेरे चैनल पर अन्य वीडियो देखे हैं, तो आपने शायद मुझे यहां-वहां यूजर मॉड कमांड का उपयोग

----------------------------------------------------

# References
## 1. [insanely-fast-whisper](https://github.com/Vaibhavs10/insanely-fast-whisper)
## 2. [Youtube Summariser](https://github.com/jxcinta/youtube_summariser/blob/main/youtube_summariser.py)