# Download video
Download audio stream of the [video](https://youtu.be/UTOQdej0Mus?si=aOmOqedANCravVA2)

In [None]:
from pytube import YouTube
# chinese video
link = "https://youtu.be/UTOQdej0Mus?si=aOmOqedANCravVA2"

yt = YouTube(link)
for i in yt.streams.filter(only_audio=True):
    print(i)


In [None]:
stream = yt.streams.get_by_itag(139)
filename = stream.download("./downloads/", filename="sermon.mp4")

## Speech Detection
We use Google's Speech to Text API using a generic key and "zh-sg" language

In [7]:
# importing libraries 
import speech_recognition as sr 
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence

# create a speech recognition object
r = sr.Recognizer()

# a function to recognize speech in the audio file
# so that we don't repeat ourselves in in other functions
def transcribe_audio(path):
    # use the audio file as the audio source
    with sr.AudioFile(path) as source:
        audio_listened = r.record(source)
        # try converting it to text
        text = r.recognize_google(audio_listened, language="zh-sg")
    return text

# a function that splits the audio file into chunks on silence
# and applies speech recognition
def get_large_audio_transcription_on_silence(path):
    """Splitting the large audio file into wav chunks
    and apply speech recognition on each of these chunks"""
    # open the audio file using pydub
    sound = AudioSegment.from_file(path)  
    # split audio sound where silence is 500 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        try:
            text = transcribe_audio(chunk_filename)
        except sr.UnknownValueError as e:
            print("Error:", str(e))
        else:
            text = f"{text.capitalize()}. "
            print(chunk_filename, ":", text)
            whole_text += text +"\n"
    # return the text for all chunks detected
    return whole_text

In [8]:
from pathlib import Path

Try speech detection for whole 1h file

In [None]:
filename = r"C:\Users\joshu\Documents\GitHub\subsy\downloads\sermon.mp4"
filepath = Path(filename)
with open(f"subs/{filepath.name}.txt", encoding="utf-8", mode="w") as f:
    
    subs = get_large_audio_transcription_on_silence(filepath.resolve())
    f.write(subs)

Accurate speech detection for a 10s clip

In [45]:
print(transcribe_audio(r"C:\Users\joshu\Documents\GitHub\subsy\downloads\audio.wav"))

非常欢迎大家来上这个课程这个课程前后一共有四本花的时间要用一年可是学校假期的时候我们也跟着假期换句话说我们注


Minor errors for a 1 min clip.

In [46]:
print(get_large_audio_transcription_on_silence(r"C:\Users\joshu\Documents\GitHub\subsy\downloads\sermon - 1m.mp4"))

audio-chunks\chunk1.wav : 非常欢迎大家来上这个课程这个课程前后一共有四本. 
audio-chunks\chunk2.wav : 花的时间要用一年可是学校假期的时候我们也跟着假期. 
audio-chunks\chunk3.wav : 换句话说我们注重你在教会的生活也看重你的家庭生活. 
audio-chunks\chunk4.wav : 希望你跟我们建立美好的关系也希望你的家庭能够和谐建立. 
audio-chunks\chunk5.wav : 亲密的关系. 
audio-chunks\chunk6.wav : 今年的课程. 
audio-chunks\chunk7.wav : 似乎很长但是当你上完了以后你会觉得. 
audio-chunks\chunk8.wav : 并不长. 
audio-chunks\chunk9.wav : 因为. 
audio-chunks\chunk10.wav : 时间过得真快. 
audio-chunks\chunk11.wav : 你学习的真多. 
audio-chunks\chunk12.wav : 在这课程中间. 
audio-chunks\chunk13.wav : 前后一共分成四本. 
Error: 
audio-chunks\chunk15.wav : 是星星的起点. 
audio-chunks\chunk16.wav : 从关系入门和大家一起分享. 
audio-chunks\chunk17.wav : 因为人是一个关系体. 
audio-chunks\chunk18.wav : 从关系入门如何我们中华文化. 
audio-chunks\chunk19.wav : 我们中华文化非常强调. 
audio-chunks\chunk20.wav : 人伦关系. 
audio-chunks\chunk21.wav : 在儒家的传统里头谈到的五轮基本上就是关西. 
audio-chunks\chunk22.wav : 君臣. 
audio-chunks\chunk23.wav : 铺子. 
非常欢迎大家来上这个课程这个课程前后一共有四本. 花的时间要用一年可是学校假期的时候我们也跟着假期. 换句话说我们注重你在教会的生活也看重你的家庭生活. 希望你跟我们建立美好的关系也希望你的家庭能够和谐建立. 亲密的

In [14]:
with open("subs/backup.txt", encoding="utf-8", mode="r") as backup, open("subs/final.txt", encoding="utf-8", mode="w") as final:
    l= backup.readlines()
    for i, line in enumerate(l):
        if "audio" in line:
            l[i] = line.split(":")[1].strip() +"\n"
    final.writelines(l)
   