In [12]:
import ffmpeg
import openai
import os
from nomic import embed, login
import glob
import pymongo
import uuid
import tempfile

In [13]:
def video_to_images(video_path, output_path):
    # Use ffmpeg to extract frames at 1 frame per second
    (
        ffmpeg.input(video_path)
        .filter('fps', fps=2)
        .output(output_path + '/image_%04d.png')
        .run()
    )


# Example usage
video_path = 'video.mp4'
output_path = 'output_images'
video_to_images(video_path, output_path)

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [14]:
client = openai.OpenAI(
    api_key=os.getenv("openai_api_key"),
)

In [15]:
# Read srt file and return a string
def read_srt(file_path):
    with open(file_path, 'r') as file:
        return file.read()

messi = read_srt("messi.srt")

In [16]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a helpful video transcriber. You will be given the subtitles of a video and you need to convert it into text. Don't make things up. Just write what you hear. The user will provide you with the subtitles. Generate a summary of what is being said in the subtitles. The summary should not acknowledge the subtitles. Make sure to write in your own words and understand the context and meaning of the subtitles. Give out minimum 6 sentences."},
    {"role": "user", "content": messi}
  ]
)
text_summary = completion.choices[0].message.content

In [17]:
images = glob.glob("output_images/*.png")

In [18]:
login(token=os.environ["nomic_api_key"])

In [21]:
client = pymongo.MongoClient(os.environ["MONGO_URI"])
db = client["final-db"]
collection_imbeds = db["messi-video"]
output = embed.images(images)

for idx, embed in enumerate(output['embeddings']):
  imageData = {"ts":idx,"vector-embedding":embed}
  collection_imbeds.insert_one(imageData)

{'prompt_tokens': 3840, 'total_tokens': 3840}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}
{'prompt_tokens': 16384, 'total_tokens': 16384}


In [20]:
search_query_embed = embed.text(texts=[text_summary], task_type="search_query")

In [22]:
def make_video_clip(input_video_filename, output_video_filename, start_time, end_time):
    ffmpeg.input(input_video_filename, ss=start_time, to=end_time).output(output_video_filename).run(overwrite_output=True)

In [23]:
def combine_timestamps(input_video_filename, output_video_filename, timestamp_windows):
    # create clips of video for timestamp windows and save to temp dir
    # combine clips into video
    with tempfile.TemporaryDirectory() as tempdir:
        # generate clips from source video by time stamp window
        outfiles = []
        
        for window in timestamp_windows:
            out = f"{tempdir}/{str(uuid.uuid4())[:8]}.mp4"

            make_video_clip(input_video_filename, out, window[0], window[1])
            outfiles.append(f"file {out}")

        # Combine outfile paths into a txt file
        combined_file = f"{tempdir}/{str(uuid.uuid4())[:8]}.txt"

        with open(combined_file, "w") as fp:
            fp.write("\n".join(outfiles))

        # Combine clips using source files
        ffmpeg.input(combined_file, format='concat', safe=0).output(output_video_filename, c='copy').run(overwrite_output=True)


In [26]:
search_query = {
    "$vectorSearch":
    {
        "queryVector": search_query_embed["embeddings"][0],
        "path": "vector-embedding",
        "numCandidates": 100,
        "index": "final_index",
        "limit": 50
    }
}

In [28]:
pipeline = [search_query]
results = collection_imbeds.aggregate(pipeline)
results_as_dict = list(results)
timestamps = []
for result in results_as_dict:
    timestamps.append(result["ts"])

sT = sorted(timestamps)
sT = [i/2 for i in sT]
print(sT)

[1.5, 3.0, 10.0, 11.5, 14.5, 16.0, 22.0, 25.5, 27.0, 27.5, 32.0, 34.5, 53.5, 54.5, 69.0, 71.0, 87.0, 103.5, 105.0, 108.0, 116.5, 123.0, 139.5, 140.0, 141.0, 153.0, 157.5, 163.5, 166.5, 169.5, 178.0, 179.0, 182.0, 183.5, 184.5, 187.5, 190.0, 193.5, 196.5, 205.5, 208.0, 209.5, 213.5, 227.5, 228.5, 241.5, 264.0, 272.5, 282.0, 285.5]


In [29]:
import datetime

final_timestamps = []
for timestamp in sT:
    final_timestamps.append((str(datetime.timedelta(seconds=timestamp)), str(datetime.timedelta(seconds=timestamp + 0.5))))

In [31]:
combine_timestamps("video.mp4", "out.mp4", final_timestamps)

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [33]:
client = openai.OpenAI(api_key=os.getenv("openai_api_key"))
response = client.audio.speech.create(model="tts-1",
  voice="alloy",
  input=text_summary
)
speech_file_path = "speech.mp3"
response.stream_to_file(speech_file_path)

  response.stream_to_file(speech_file_path)


In [34]:
def combine_audio_video(input_video_file, input_audio_file, output_video_file):
    input_video = ffmpeg.input(input_video_file)
    input_audio = ffmpeg.input(input_audio_file)

    ffmpeg.concat(input_video, input_audio, v=1, a=1).output(output_video_file).run()

combine_audio_video("out.mp4", "speech.mp3", "final_with_ai_voice.mp4")

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab