# Uploading videos to Gemini

In [1]:
import os
import time
import json
import base64
import hashlib
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
experiment_location = "data/input"
filename = "gemini-multimodal-video.jsonl"
media_location = "data/media"

In [3]:
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
if GEMINI_API_KEY is None:
    raise ValueError("GEMINI_API_KEY is not set")

genai.configure(api_key=GEMINI_API_KEY)

In [4]:
def compute_sha256_base64(file_path):
    hasher = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            hasher.update(chunk)
    digest_bytes = hasher.digest()
    return base64.b64encode(digest_bytes).decode("utf-8")


def upload(file_path):

    # Compute the hash for your local video.
    local_hash = compute_sha256_base64(file_path)
    # List all files already uploaded.
    files_response = list(genai.list_files())
    uploaded = False
    for file in files_response:
        hex_str = file.sha256_hash.decode("utf-8")
        raw_bytes = bytes.fromhex(hex_str)
        converted_b64 = base64.b64encode(raw_bytes).decode("utf-8")
        if converted_b64 == local_hash:
            return file.name
    if uploaded == False:
        file = genai.upload_file(path=file_path)

        # Poll until the video has been processed
        while file.state.name == "PROCESSING":
            print("Waiting for file to be processed.")
            time.sleep(10)
            file = genai.get_file(file.name)

        if file.state.name == "FAILED":
            raise ValueError("File processing failed")
        return file.name

In [5]:
files_to_upload = set()
experiment_path = f"{experiment_location}/{filename}"

# Read and collect media file paths
with open(experiment_path, "r") as f:
    lines = f.readlines()

data_list = []

for line in lines:
    data = json.loads(line)
    data_list.append(data)

    if not isinstance(data.get("prompt"), list):
        continue

    files_to_upload.update(
        f'{media_location}/{el["media"]}'
        for prompt in data["prompt"]
        for part in prompt.get("parts", [])
        if isinstance(el := part, dict) and "media" in el
    )

# Upload files and store mappings
genai_files = {}
for file_path in files_to_upload:
    uploaded_filename = upload(file_path)
    genai_files[file_path] = uploaded_filename

# Modify data to include uploaded filenames
for data in data_list:
    if isinstance(data.get("prompt"), list):
        for prompt in data["prompt"]:
            for part in prompt.get("parts", []):
                if isinstance(part, dict) and "media" in part:
                    file_path = f'{media_location}/{part["media"]}'
                    if file_path in genai_files:
                        part["uploaded_filename"] = genai_files[file_path]

# Write modified data back to the JSONL file
with open(experiment_path, "w") as f:
    for data in data_list:
        f.write(json.dumps(data) + "\n")