In [1]:
import whisper
import moviepy as mp
import eyed3

In [2]:

def generate_subtitles(video_path, audio_path, output_srt_path):
    """Generates subtitles for a video using Whisper.
    models (https://github.com/openai/whisper):
        "tiny.en" | "tiny"
        "base.en" | "base"
        "small.en" | small"
        "medium.en" | edium"
        "large-v1"
        "large-v2"
        "large-v3"
        "large"
        "large-v3-turbo"
        "turbo"
    """

    # Extract audio from the video
    video = mp.VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path)

    # Load the Whisper model
    model = whisper.load_model("turbo", device="cuda")  # Or another model like "small", "medium", "large", "turbo"

    # Transcribe the audio
    result = model.transcribe(audio_path, verbose=False, condition_on_previous_text=False)

    # Generate SRT file
    with open(output_srt_path, "w", encoding="utf-8") as f:
        for i, segment in enumerate(result["segments"]):
            start = segment["start"]
            end = segment["end"]
            text = segment["text"]

            f.write(f"{i+1}\n")
            f.write(f"{format_time(start)} --> {format_time(end)}\n")
            f.write(f"{text}\n\n")

def format_time(seconds):
    """Formats time in seconds to SRT format."""
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(seconds*1000 % 1000):03}"


In [3]:
def add_tag(audio_file: str, artist: str, album: str):
    file = eyed3.load(audio_file)
    file.tag.artist = artist
    file.tag.album = album
    file.tag.save()

In [48]:
# %%timeit
if __name__ == "__main__":
    artist = "T. Kingfisher"
    album_name = "Nettle and Bone"
    file_name = "sample_1"
    for i in range(1,23):
        video_path = f"videos/{album_name}/{i:02d} {album_name}.mp4"
        audio_path = f"videos/{album_name}/{i:02d} {album_name}.mp3"
        output_srt_path = f"videos/{file_name}_gemini.srt"
        generate_subtitles(video_path, audio_path, output_srt_path)
        add_tag(audio_path, artist, album_name)

''' results
1g= base nocuda:9m 38s base cuda:4m 27s turbo cuda: 9m 36s
10g= turbo cuda: 4m 17s
369k=
sample_4g=turbo cuda: 11s

'''

MoviePy - Writing audio in videos/Nettle and Bone/01 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/02 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/03 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/04 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/05 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/06 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/07 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/08 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/09 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/10 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/11 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/12 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/13 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/14 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/15 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/16 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/17 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/18 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/19 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/20 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/21 Nettle and Bone.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in videos/Nettle and Bone/22 Nettle and Bone.mp3


                                                                        

MoviePy - Done.


' results\n1g= base nocuda:9m 38s base cuda:4m 27s turbo cuda: 9m 36s\n10g= turbo cuda: 4m 17s\n369k=\nsample_4g=turbo cuda: 11s\n\n'

In [None]:

#! does not work
def embed_subtitles(video_input, subtitle_path, video_output):
    """Embeds subtitles into a video using MoviePy."""

    video = mp.VideoFileClip(video_input)
    subtitles = mp.TextClip(font=r'fonts\swansea-font\SwanseaItalic-AwqD.ttf', text=subtitle_path, font_size=24, color='white', duration=video.duration)

    # Composite video and subtitles
    final_video = mp.CompositeVideoClip([video, subtitles])

    # Write the final video to a file
    print(f'{video.duration=}')
    final_video.write_videofile(video_output, codec='libx264')


if __name__ == "__main__":
    file_name = "sample_1"
    video_input = f"videos/{file_name}.mkv"
    subtitle_path = f"videos/{file_name}_gemini.srt"
    video_output = f"videos/{file_name}_embedded.mkv"
    
    embed_subtitles(video_input, subtitle_path, video_input)


In [4]:
# %%timeit
if __name__ == "__main__":
    file_name = "fileName"
    video_path = f"videos/{file_name}.mkv"
    audio_path = f"videos/{file_name}.mp3"
    output_srt_path = f"videos/{file_name}_gemini.srt"
    generate_subtitles(video_path, audio_path, output_srt_path)

''' results
1g= base nocuda:9m 38s base cuda:4m 27s turbo cuda: 9m 36s
10g= turbo cuda: 4m 17s
369k=
sample_4g=turbo cuda: 11s

'''

MoviePy - Writing audio in videos/Kraven.The.Hunter.2024.1080p.WEBRip.DDP5.1.x265.10bit-LAMA.mp3


                                                                          

MoviePy - Done.


  checkpoint = torch.load(fp, map_location=device)


Detected language: English


100%|██████████| 760809/760809 [03:14<00:00, 3916.28frames/s] 


' results\n1g= base nocuda:9m 38s base cuda:4m 27s turbo cuda: 9m 36s\n10g= turbo cuda: 4m 17s\n369k=\nsample_4g=turbo cuda: 11s\n\n'