A short step by step guide to use the subtitle generator:

1) In Runtime, change Runtime type to T4-GPU

2) Run the first line(notice, it might fail to run in the first time and require to run it a second time- this time it should work)

3) Upload the video file as video(Should be saved as video.mp4)

4) Run one by one all the different code segments

5) Subtitles will be saved in the files as subtitles.srt

In [None]:
!pip install git+https://github.com/openai/whisper.git

In [None]:
from moviepy.editor import VideoFileClip
clip = VideoFileClip("/content/video.mp4")
clip.audio.write_audiofile("audio.wav")

In [None]:
import whisper
from datetime import timedelta

# Load Whisper model
model = whisper.load_model("large")

In [None]:
!pip install srt

In [None]:
import srt
# Transcribe audio (replace 'your_audio.wav' with your actual file)
result = model.transcribe("/content/audio.wav", language='he')
# Function to split text into blocks according to timing
def split_text_by_timing(text, num_blocks):
    words = text.split()
    blocks = []
    words_per_block = len(words) // num_blocks  # Base number of words per block

    current_block = []
    for i, word in enumerate(words):
        current_block.append(word)
        if (i + 1) % words_per_block == 0 and len(blocks) < num_blocks - 1:
            blocks.append(" ".join(current_block))
            current_block = []

    # Add any remaining words to the last block
    if current_block:
        blocks.append(" ".join(current_block))

    return blocks

# Function to split text into lines with 16-24 characters per line
def split_into_subtitles(text, min_chars_per_line=16, max_chars_per_line=24, max_lines_per_block=2):
    words = text.split()
    current_line = ""
    subtitle_blocks = []
    current_block = []

    for word in words:
        # Try to add the word to the current line
        if len(current_line) + len(word) + 1 <= max_chars_per_line:  # +1 for the space
            current_line += (word + " ")
        else:
            # If current line is shorter than the minimum, attempt to add more words
            if len(current_line.strip()) < min_chars_per_line:
                current_line += word + " "
            else:
                # Add the current line to the current block
                current_block.append(current_line.strip())
                current_line = word + " "

            # If we've reached the max lines per block, start a new block
            if len(current_block) == max_lines_per_block:
                subtitle_blocks.append("\n".join(current_block))
                current_block = []

    # Add any remaining text in the last line/block
    if current_line:
        current_block.append(current_line.strip())
    if current_block:
        subtitle_blocks.append("\n".join(current_block))

    return subtitle_blocks

# Create SRT subtitles with timing first, then character constraints
subtitles = []
index = 1  # Subtitle index

for segment in result['segments']:
    start_time = timedelta(seconds=segment['start'])
    end_time = timedelta(seconds=segment['end'])
    segment_text = segment['text']

    # Calculate time duration for each block
    segment_duration = end_time - start_time

    # Decide how many subtitle blocks we need for this segment based on segment length
    num_blocks = max(1, len(segment_text) // 24)  # Adjust based on total segment length

    # Split the text for this segment into subtitle blocks
    text_blocks = split_text_by_timing(segment_text, num_blocks)

    # Further split each block into lines with 16-24 characters per line
    for block in text_blocks:
        subtitle_lines = split_into_subtitles(block)

        time_per_block = segment_duration / len(text_blocks)

        # Assign the start and end times for each block
        block_start_time = start_time
        block_end_time = start_time + time_per_block

        for subtitle in subtitle_lines:
            subtitles.append(srt.Subtitle(index=index, start=block_start_time, end=block_end_time, content=subtitle))
            index += 1

        start_time = block_end_time

# Compose SRT content
srt_content = srt.compose(subtitles)

# Write SRT file
with open("subtitles.srt", "w") as f:
    f.write(srt_content)

print("Subtitles created and saved as subtitles.srt")