In [None]:
import whisper_timestamped as whisper
from moviepy.editor import *
import json
import openai
import re
import cv2
import srt
from datetime import datetime
from PIL import ImageFont, ImageDraw, Image
import numpy as np
from moviepy.editor import VideoFileClip
import uuid

In [None]:
def extract_audio(input_video):
    video_path = input_video
    audio_path = input_video.replace("mp4", "wav")

    video = VideoFileClip(video_path)
    audio = video.audio
    audio.write_audiofile(audio_path, codec='pcm_s16le')

    return audio_path

def extract_subtitle(path_audio):
    model = whisper.load_model("large")
    audio = whisper.load_audio(path_audio)
    result = whisper.transcribe(model, audio)

    output_path = path_audio.replace(".wav","_en.srt")
    json_to_srt(json.loads(json.dumps(result['segments'])), output_path)
    return output_path


def json_to_srt(json_data, srt_file):    
    with open(srt_file, 'w', encoding='utf-8') as f:
        count = 1
        for item in json_data:
            print(item)
            start_time = item['start']
            end_time = item['end']
            text = item['text']
            srt_entry = f"{count}\n{seconds_to_srt_time(start_time)} --> {seconds_to_srt_time(end_time)}\n{text}\n\n"
            f.write(srt_entry)
            count += 1

def seconds_to_srt_time(seconds):
    milliseconds = int((seconds - int(seconds)) * 1000)
    minutes, seconds = divmod(int(seconds), 60)
    hours, minutes = divmod(minutes, 60)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

def make_message(path_srt, country):
    message = "The following is an SRT file. Convert the English subtitles to %s subtitles and provide the output in SRT file format. Only output and no other conversation.\n"%(country)
    try:
        with open(path_srt, 'r', encoding='utf-8') as file:
            for line in file:
                # Please remove if you want to translate until end of video. 
                if line == "70\n":
                    break
                message += line
    except FileNotFoundError:
        message += "Error: File not found."

    request_payload = [
        {
            "role":"user",
            "content":message
        }
    ]

    return request_payload

def save_script_as_other_country(path_srt, api_key, country="Korean"):
    path_srt_kor = re.sub(r'([^/]+)\.srt$', r'\1_%s.srt'%(country), path_srt)
    request_payload = make_message(path_srt, country=country)
    final_content = request_gpt_api(api_key, request_payload)
    with open(path_srt_kor, "w", encoding='utf-8') as f:
        f.write(final_content)

    return path_srt_kor


def request_gpt_api(api_key, request_payload):
    client = openai.OpenAI(
        api_key=api_key,
    )
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo", messages=request_payload,
        stream=True,
    )
    final_content = ""
    for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            final_content += chunk.choices[0].delta.content
    return final_content

def add_video_subtitle_from_srt(input_video, output_video, srt_file):
    print(output_video)
    with open(srt_file, "r", encoding="utf-8") as f:
        subtitles = list(srt.parse(f))
        print(subtitles)

    cap = cv2.VideoCapture(input_video)
    if not cap.isOpened():
        print("Error: Failed to open input video.")
        return
    audio_clip = VideoFileClip(input_video).audio

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    duration = frame_count / fps

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    tmp_path = "tmp%s.mp4"%(str(uuid.uuid4()))
    out = cv2.VideoWriter(tmp_path, fourcc, fps, (frame_width, frame_height))

    list_frame = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        list_frame.append(frame)

    for subtitle in subtitles:

        start_frame = int(subtitle.start.total_seconds() * fps)
        end_frame = int(subtitle.end.total_seconds() * fps)

        if subtitle.start.total_seconds() > duration:
            break
        if subtitle.end.total_seconds() > duration:
            break


        for index_frame in range(start_frame, end_frame):
            frame = list_frame[index_frame]
            text = subtitle.content
            font = ImageFont.truetype("./fonts/MaruBuri-Bold.ttf", 20)
            frame = Image.fromarray(frame)
            draw = ImageDraw.Draw(frame)
            text_width, text_height = draw.textsize(text, font=font)
            image_width, image_height = frame.size
            x = (image_width - text_width) // 2
            y = (image_height - text_height) // 2

            draw.text((x, 200), text, (255, 255, 255), font=font)
            frame = np.array(frame)

            list_frame[index_frame] = frame
    for frame in list_frame:
        out.write(frame)

    cap.release()
    out.release()

    video_clip = VideoFileClip(tmp_path)
    final_clip = video_clip.set_audio(audio_clip)
    final_clip.write_videofile(output_video, codec="libx264", audio_codec="aac")

    cv2.destroyAllWindows()

    os.remove(tmp_path)


In [None]:
path_video = "./video_elonmusk.mp4"
api_key = ""

In [None]:
path_audio = extract_audio(path_video)
path_srt_en = extract_subtitle(path_audio)
path_srt_kor = save_script_as_other_country(path_srt_en, api_key, country="Korean")
path_srt_fr = save_script_as_other_country(path_srt_en, api_key, country="French")

In [None]:
add_video_subtitle_from_srt(path_video, path_srt_en.replace(".srt",".mp4"), path_srt_en)
add_video_subtitle_from_srt(path_video, path_srt_kor.replace(".srt",".mp4"), path_srt_kor)
add_video_subtitle_from_srt(path_video, path_srt_fr.replace(".srt",".mp4"), path_srt_fr)