In [1]:
import os
from dotenv import load_dotenv
from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip
load_dotenv()

from openai import OpenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
client = OpenAI(
    api_key=OPENAI_API_KEY
)


Extract Audio

In [3]:
videoFilePath = r"C:\Users\aaesp\Projects\VideoEditor\WebsiteDemo.mp4"
audioFilePath = r"C:\Users\aaesp\Projects\VideoEditor\WebsiteDemo.mp3"

video = VideoFileClip(videoFilePath)
audio = video.audio
audio.write_audiofile(audioFilePath)

MoviePy - Writing audio in C:\Users\aaesp\Projects\VideoEditor\WebsiteDemo.mp3


                                                                      

MoviePy - Done.




In [20]:
video.size

[1920, 1080]

Transcribe

In [4]:
with open(audioFilePath, "rb") as audio_file:
    transcript = client.audio.transcriptions.create(
        model="whisper-1", 
        file=audio_file,
        response_format="verbose_json",
        timestamp_granularities=["word"]
    )

transcript

Transcription(text="All right, hello, my name is Alejandro Espino and this is my demo of my portfolio website. So as you can see, I have bought a domain, alejandroespino.dev, website isn't much so far. This is all I have. I pretty much have links to my LinkedIn and GitHub that when clicked, it'll take you to a separate tab of my LinkedIn and here's my GitHub. And I have a article for my resume. This isn't necessarily how I want it to be, but it's all I could figure out. I don't have much experience, actually, I don't have any experience with web development. So it's something you need to do a deeper dive into, but I got that to show. And I also have a contact page that I guess I could show really fast. Let's just say I wanted to send an email or a message and when that is sent, okay, this is another bug, but a notification will be sent to my email here, which is kind of nice. Things that I would like to do in the future is have a picture of myself. I don't have many professional lookin

In [5]:
transcriptList = transcript.words

In [21]:
transcriptList[0]

{'word': 'All', 'start': 1.8799999952316284, 'end': 3.0}

Adding Captions

In [6]:
def get_text_dimensions(text_string, font):
    # https://stackoverflow.com/a/46220683/9263761
    ascent, descent = font.getmetrics()

    text_width = font.getmask(text_string).getbbox()[2]
    text_height = font.getmask(text_string).getbbox()[3] + descent

    return (text_width, text_height)

In [7]:
from PIL import Image, ImageDraw, ImageFont

def create_transparent_image_with_text(text, size=(300, 100), font_size=24, font_color=(255, 255, 255), output_file='output.png'):
    # Create a transparent image
    image = Image.new('RGBA', size, (0, 0, 0, 0))
    
    # Create a draw object
    draw = ImageDraw.Draw(image)
    
    # Load a font
    try:
        font = ImageFont.truetype("arial.ttf", font_size)
    except IOError:
        font = ImageFont.load_default()
    
    # Calculate text position to center it
    text_width, text_height = get_text_dimensions(text, font)
    position = ((size[0]-text_width)/2, (size[1]-text_height)/2)
    
    # Draw the text
    draw.text(position, text, font=font, fill=font_color)
    
    # Save the image
    return image


In [8]:
import numpy as np

In [41]:
im = create_transparent_image_with_text("HI")
clip = ImageClip(np.array(im))

In [47]:
transcriptList[0]

{'word': 'All', 'start': 1.8799999952316284, 'end': 3.0}

In [9]:
captions = []

for obj in transcriptList:
    text = obj["word"]
    start = obj["start"]
    end = obj["end"]
    print(start,end)

    img = create_transparent_image_with_text(text)
    clip = ImageClip(np.array(img), duration=end-start)
    clip = clip.set_start(start)
    captions.append(clip)
    


1.8799999952316284 3.0
3.0 3.0
3.240000009536743 3.5199999809265137
3.640000104904175 3.799999952316284
3.799999952316284 3.9800000190734863
3.9800000190734863 5.0
5.0 5.519999980926514
5.519999980926514 6.139999866485596
6.139999866485596 7.099999904632568
7.099999904632568 7.380000114440918
7.380000114440918 7.539999961853027
7.539999961853027 7.960000038146973
7.960000038146973 8.300000190734863
8.300000190734863 8.979999542236328
8.979999542236328 10.420000076293945
10.420000076293945 10.420000076293945
10.420000076293945 11.100000381469727
12.520000457763672 14.779999732971191
14.779999732971191 16.559999465942383
16.559999465942383 16.700000762939453
16.700000762939453 16.899999618530273
16.899999618530273 17.040000915527344
17.040000915527344 17.299999237060547
17.299999237060547 17.6200008392334
17.6200008392334 17.8799991607666
17.8799991607666 18.639999389648438
18.639999389648438 18.639999389648438
19.200000762939453 19.760000228881836
19.760000228881836 20.399999618530273
2

In [68]:
captions[0].duration

1.1200000047683716

In [66]:
[video]+captions

[<moviepy.video.io.VideoFileClip.VideoFileClip at 0x2494bf24350>,
 <moviepy.video.VideoClip.ImageClip at 0x2493ea558e0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27200>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf24ec0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf26a80>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf26840>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf25490>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf279b0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27b90>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf279e0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf26ae0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27ce0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27650>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf26e40>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf277d0>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27f50>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf27c20>,
 <moviepy.video.VideoClip.ImageClip at 0x2494bf26c00>,

In [12]:
newVideo = CompositeVideoClip([video] + captions, use_bgclip=True)
newVideo = newVideo.set_duration(video.duration)

In [16]:
newVideoFilePath = r"C:\Users\aaesp\Projects\VideoEditor\NewWebsiteDemo.mp4"

newVideo.write_videofile(newVideoFilePath, threads=4, audio=True)

t:  39%|███▊      | 4375/11327 [02:38<02:20, 49.43it/s, now=None]

Moviepy - Building video C:\Users\aaesp\Projects\VideoEditor\NewWebsiteDemo.mp4.
Moviepy - Writing video C:\Users\aaesp\Projects\VideoEditor\NewWebsiteDemo.mp4





KeyboardInterrupt: 

