# Historical Speeches to Clips - Execution Notebook

This notebook is intended to leverage the classes and functions of the HSC package in order to create an interactive pipeline for generating 1 minute long clips.

In [1]:
# This cell controls the auto reloading of packages - disable to save time if not in development mode
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2

In [2]:
# Path and System Configuration\n,
from pathlib import Path
import sys

root = Path.cwd().resolve().parent
scripts_path = root / "code" / "scripts"
captions_path = root / "code" / "captions"

if str(scripts_path) not in sys.path:
    sys.path.insert(0, str(scripts_path))
    print("Added to System Path:", scripts_path)

if str(captions_path) not in sys.path:
    sys.path.insert(0, str(captions_path))
    print("Added to System Path:", captions_path)

Added to System Path: C:\Users\cjmij\z_projects\historical_speech_project\code\scripts
Added to System Path: C:\Users\cjmij\z_projects\historical_speech_project\code\captions


In [3]:
# External Package Imports
import json, os
from dotenv import load_dotenv

# Internal Scripts Package Imports
from scripts.ingest import ingest_folder
from scripts.deepgram_model import generate_deepgram_audio
from scripts.video_generator import create_video
from scripts.video_generator_221_advanced import create_video_ffmpeg


# Internal Captions Package Imports
from captions.caption_generator import prepare_file_for_adding_captions_n_headings_thru_html
from captions.line_level_captions_adv import split_lines_with_capitalization
from captions.video_with_captions_adv import create_video_with_captions_adv

# Section 0: Setup File Paths and Project Structure

In [4]:
# This is where you set the text you want to create the video for
text_to_process = "federalist-10"
# Set the background image that will be shared across video files
background_image = "James_Madison.jpg"

In [5]:
# Setup directory paths for specific raw text file
RAW_TEXT_DIR = Path(f"../data/raw_texts/{text_to_process}")
PROCESSED_TEXT_DIR = Path("../data/processed_texts")

TEXT_CHUNKS_DIR = Path(f"../data/processed_texts/{text_to_process}_chunks")
BACKGROUND_DIR = Path("../assets/backgrounds")
BACKGROUND_IMAGE_PATH = Path(f"{BACKGROUND_DIR}/{background_image}")
OVERLAY_IMAGE_DIR = Path(f"../assets/overlay_images")

PROCESSED_AUDIO_DIR = Path(f"../data/processed_audio/{text_to_process}_chunks")
BASIC_VIDEO_OUTPUT_DIR = Path(f"../data/video_output/{text_to_process}/basic")
CAPTION_VIDEO_OUTPUT_DIR = Path(f"../data/video_output/{text_to_process}/caption")

WORD_CAPTION_JSON_DIR = Path(f"../data/captions/{text_to_process}/word_timestamps")
LINE_CAPTION_JSON_DIR = Path(f"../data/captions/{text_to_process}/line_timestamps")
IMAGE_CAPTION_JSON_DIR = Path(f"../data/captions/{text_to_process}/image_timestamps")

# Ensure all necessary directories exist
for d in [
    RAW_TEXT_DIR,
    PROCESSED_TEXT_DIR,
    TEXT_CHUNKS_DIR,
    BACKGROUND_DIR,
    OVERLAY_IMAGE_DIR,
    PROCESSED_AUDIO_DIR,
    BASIC_VIDEO_OUTPUT_DIR,
    CAPTION_VIDEO_OUTPUT_DIR,
    WORD_CAPTION_JSON_DIR,
    LINE_CAPTION_JSON_DIR,
    IMAGE_CAPTION_JSON_DIR,
]:
    d.mkdir(parents=True, exist_ok=True)

# Load environment variables from ../.env
load_dotenv(dotenv_path=Path("../.env"))
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY")

# Section 1: Text Processing

In [None]:
# This is the main ingestion process that takes the text and breaks it into different sized chunks
# These parameters will control the length of each video and the total number of chunks generated
# ingest_folder(RAW_TEXT_DIR, PROCESSED_TEXT_DIR, target_seconds=65, wpm=120)

In [None]:
print((TEXT_CHUNKS_DIR / "manifest.json").read_text()[:464], "...\n")

# Peek first chunk
first = sorted(TEXT_CHUNKS_DIR.glob("*.json"))
first = [p for p in first if p.name != "manifest.json"][0]
chunk = json.loads(first.read_text(encoding="utf-8"))
chunk["chunk_id"], chunk["approx_word_count"], chunk["est_duration_sec"], chunk["text"][:200] + "..."

# Section 2: Deepgram TTS Model Setup

In [None]:
# Get all chunk files except manifest.json
chunk_files = sorted([p for p in TEXT_CHUNKS_DIR.glob("*.json") if p.name != "manifest.json"])

# This setup will create a multitude of mp3 files based on the contents of the chunks dir variable

# for chunk_path in chunk_files:
# # if chunk_files:
# #     chunk_path = chunk_files[0]
#     current = json.loads(chunk_path.read_text(encoding="utf-8"))

#     temp_out_file_name = Path(f"{PROCESSED_AUDIO_DIR}/{current['chunk_id']}-tts.mp3")
#     temp_out_file_name.parent.mkdir(parents=True, exist_ok=True)
#     temp_input_text = {"text": current["text"]}

#     generate_deepgram_audio(local_api_key=DEEPGRAM_API_KEY, 
#                             out_file_name=str(temp_out_file_name),
#                             input_text=temp_input_text)

# Section 3: Merging Audio and Video

In [None]:
# Get all mp3 files from the processed audio directory
mp3_files = sorted([p for p in PROCESSED_AUDIO_DIR.glob("*.mp3")])

print(f"Found {len(mp3_files)} mp3 files to process for video creation.")

# for idx, file in enumerate(mp3_files, 1):
#     print(f"[{idx}/{len(mp3_files)}] Creating video for audio: {file.name}")
#     create_video(
#         image_path=BACKGROUND_IMAGE_PATH,
#         audio_path=file,
#         output_path=BASIC_VIDEO_OUTPUT_DIR,
#         target_size=(1080, 1920)
#     )

In [None]:
mp4_files = sorted([p for p in BASIC_VIDEO_OUTPUT_DIR.glob("*.mp4")])

print(f"\nFound {len(mp4_files)} mp4 files to process for caption JSON generation.")

# for idx, file in enumerate(mp4_files, 1):
#     file_stem = file.stem  # gets the filename without the suffix as a string
#     out_json_word_path = Path(f"{WORD_CAPTION_JSON_DIR}/{file_stem}.json")
#     out_json_line_path = Path(f"{LINE_CAPTION_JSON_DIR}/{file_stem}.json")

#     print(f"[{idx}/{len(mp4_files)}] Preparing JSON for: {file.name}")
#     prepare_file_for_adding_captions_n_headings_thru_html(input_video_path=file, 
#                                                         out_json_path=out_json_word_path)
    
#     print(f"[{idx}/{len(mp4_files)}] Splitting lines with capitalization for: {file.name}")
#     split_lines_with_capitalization(out_json_word_path, out_json_line_path)

In [None]:
print(f"\nAdding captions to {len(mp4_files)} videos.")

# for idx, file in enumerate(mp4_files, 1):
#     file_stem = file.stem  # e.g., "video1"
#     json_line_path = LINE_CAPTION_JSON_DIR / f"{file_stem}.json"

#     # Ensure output file has .mp4 extension and "-caption" suffix before extension
#     out_caption_video_name = f"{file_stem}-caption.mp4"
#     out_caption_video_path = CAPTION_VIDEO_OUTPUT_DIR / out_caption_video_name

#     print(f"[{idx}/{len(mp4_files)}] Creating captioned video for: {out_caption_video_name}")
#     create_video_with_captions_adv(
#         mp4_file=file,
#         linelevel_timestamps=json_line_path,
#         video_out_path=out_caption_video_path,
#         font="COPRGTB"
#     )

In [None]:
# from matplotlib import font_manager

# # Collect all available system fonts
# available_fonts = font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

# # Print font file paths
# for font_path in available_fonts:
#     print(font_path)

# # Optional: get the "family name" from each font
# from matplotlib import font_manager
# for font_path in available_fonts[:10]:  # just first 10
#     font_prop = font_manager.FontProperties(fname=font_path)
#     print(font_prop.get_name())


# Section 4: Adding Additional Visual Effects

In [6]:
from video_inserts.keyphrase_extractor import extract_keyphrases
from video_inserts.create_inserts_from_timestamps import build_inserts_for_chunk

chunk_files = sorted([p for p in TEXT_CHUNKS_DIR.glob("*.json") if p.name != "manifest.json"])

for chunk_path in chunk_files:
# if chunk_files:
#     chunk_path = chunk_files[0]
    current = json.loads(chunk_path.read_text(encoding="utf-8"))

    current_word_ts = Path(f"{WORD_CAPTION_JSON_DIR}/{chunk_path.stem}-tts.json")
    word_ts = json.loads(current_word_ts.read_text(encoding="utf-8"))

    # temp_out_file_name = Path(f"{PROCESSED_AUDIO_DIR}/{current['chunk_id']}-tts.mp3")
    # temp_out_file_name.parent.mkdir(parents=True, exist_ok=True)
    temp_input_text = {"text": current["text"]}
    phrases = extract_keyphrases(current["text"], top_k=15)

    # for i, p in enumerate(phrases, 1):
    #     print(f"{i}. {p}")

    inserts_doc = build_inserts_for_chunk(
        wordlevel_json=word_ts,
        phrases=phrases,
        fps=24,
        min_duration=4.5,  # tune per your pacing
        pad_pre=0.1,
        pad_post=0.0,
        min_start_time=4.0,
        avoid_overlaps=True,
        gap_after=2.0
    )
    
    image_caption_json_path = Path(f"{IMAGE_CAPTION_JSON_DIR}/{chunk_path.stem}-img-caption.json")
    image_caption_json_path.parent.mkdir(parents=True, exist_ok=True)
    image_caption_json_path.write_text(json.dumps(inserts_doc, indent=2, ensure_ascii=False), encoding="utf-8")

    # import pprint
    # pprint.pprint(inserts_doc, sort_dicts=False, width=120)

In [8]:
from video_inserts.image_cache_manager import ensure_images_for_phrases, get_or_create_image_for_phrase
from video_inserts.reconcile_inserts_with_images import reconcile_inserts_json

image_chunk_files = sorted([p for p in IMAGE_CAPTION_JSON_DIR.glob("*.json") if p.name != "manifest.json"])

# for image_chunk_path in image_chunk_files:
if image_chunk_files:
    image_chunk_path = image_chunk_files[0]
    reconcile_inserts_json(
        image_chunk_path,
        overlay_dir=OVERLAY_IMAGE_DIR,
        preferred_source="replicate",
        replicate_api_key=REPLICATE_API_KEY,
        replicate_model="black-forest-labs/flux-schnell",
        replicate_inputs={"width": 1024, "output_format": "jpg"},
        log_level="INFO",
    )


[INFO] reconcile_inserts: Loading inserts JSON: ..\data\captions\federalist-10\image_timestamps\federalist-10-part-01-img-caption.json
[INFO] reconcile_inserts: Found 4 phrase(s) from inserts[*]['reason'].
[INFO] reconcile_inserts: 4 unique phrase(s) after de-duplication.
[INFO] reconcile_inserts: Resolving images for 4 phrase(s) into: ..\assets\overlay_images
[INFO] reconcile_inserts: Ensuring images for 4 phrase(s) into ..\assets\overlay_images
[INFO] reconcile_inserts: [1/4] Resolving: 'a well constructed Union'
[INFO] reconcile_inserts: [cache] MISS for anchor='union' (phrase: 'a well constructed Union')
[INFO] reconcile_inserts: [replicate] Running model=black-forest-labs/flux-schnell prompt='In a 1780s style illustration output an image that represents the concept of a well constructed Union'
[INFO] reconcile_inserts: [replicate] Succeeded
[INFO] reconcile_inserts: [save] Wrote image -> ..\assets\overlay_images\union.jpg
[INFO] reconcile_inserts: [1/4] ✓ 'a well constructed Union

In [9]:
from pathlib import Path
from moviepy import VideoFileClip, CompositeVideoClip
# If your script is saved as overlay_from_json.py in the same folder:
from video_inserts.overlay_from_json import _parse_inserts, build_overlays

# Paths
video_in = Path("../data/video_output/federalist-10/caption/federalist-10-part-01-tts-caption.mp4")
json_in = Path("../data/captions/federalist-10/image_timestamps/federalist-10-part-01-img-caption.json")
video_out = Path("../data/video_output/federalist-10/image_overlays/output.mp4")

base = VideoFileClip(str(video_in))
base_w, base_h = base.size

fps, inserts = _parse_inserts(json_in)
overlays = build_overlays(base_w, base_h, inserts)

comp = CompositeVideoClip([base] + overlays, size=(base_w, base_h))

# Quick inline preview for short clips (MoviePy v2):
# comp.preview(fps=fps)   # opens a window; for Jupyter, you can write to a temp mp4 and display.

comp.write_videofile(
    str(video_out),
    codec="libx264",
    fps=fps or (base.fps if getattr(base, "fps", None) else 24),
    ffmpeg_params=["-preset", "medium", "-crf", "18", "-pix_fmt", "yuv420p"],
)


MoviePy - Building video ..\data\video_output\federalist-10\image_overlays\output.mp4.
MoviePy - Writing audio in outputTEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing video ..\data\video_output\federalist-10\image_overlays\output.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ..\data\video_output\federalist-10\image_overlays\output.mp4


# Section 5: Commentary

In [None]:
from commentary.ollama_commentary import process_json_file, process_many_json_files

import requests, json
try:
    requests.post("http://localhost:11434/api/tags", json={}).raise_for_status()
    print("✅ Ollama API reachable")
except Exception as e:
    print("⚠️ Ollama API not reachable:", e)
    print("Tip: start it with `ollama serve` in a terminal.")