In [1]:
%pip install opencv-python
%pip install matplotlib
%pip install python-dotenv
%pip install --upgrade openai

Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m635.4 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:03[0m
[?25hCollecting numpy<2.3.0,>=2
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m590.5 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hInstalling collected packages: numpy, opencv-python
Successfully installed numpy-2.2.6 opencv-python-4.12.0.88
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp310-c

In [None]:
# imports
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv
import openai

load_dotenv()

# Splitting of Comic Panels

In [19]:
def split_comic_panels(comic_path, output_path):
    image = cv2.imread(Path(comic_path))
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    bounding_rects = []
    panels = []
    for contour in contours:
        if cv2.contourArea(contour) > 1000:
            x, y, w, h = cv2.boundingRect(contour)
            bounding_rects.append((x, y, w, h))
    
    bounding_rects.sort(key=lambda x: x[0]//10)
    bounding_rects.sort(key=lambda x: x[1]//10)
    
    for (x, y, w, h) in bounding_rects:
        panel = image[y:y+h, x:x+w]
        panels.append(panel)
        cv2.imwrite(Path(f"{output_path}/{len(panels)-1}.png"), panel)

    return len(bounding_rects) # return the number of panels


split_comic_panels("smbc-beautiful-4.png", "./temp-comic-panels/")

5

In [27]:
client = openai.OpenAI()
# Function to create a file with the Files API
def create_file(file_path):
  with open(file_path, "rb") as file_content:
    result = client.files.create(
        file=file_content,
        purpose="vision",
    )
    return result.id

file_path = Path("./smbc-motivation-2.png")
num_panels = split_comic_panels(file_path, "./temp-comic-panels")

# Getting the file ID
file_id = create_file(file_path)


prompt =  """
Please transcribe this {}-panel comic in the same form as the following example transcription:
### Panel 1
Red-haired woman: No! No more saying irregardless to mean regardless! You're double-negating with the irr and the less.
### Panel 2
Grey-haired woman: You misheard. I said irrirregardless. Double the irr. So it's back to meaning regardless.
### Panel 3
Grey-haired woman: As long as I employ an even number of irrs, my word remains lexically valid.
### Panel 4
Red-haired woman: But...why?
Grey-haired woman: I use it as an alarm noise when people are too prescriptivist about language.
### Panel 5
*silhouetted*
Red-haired woman: That's still not proper-
Grey-haired woman: Irrirrirrirrirr
""".format(num_panels)

response = client.responses.create(
    model="gpt-4.1",
    temperature=0.0,
    input=[{
        "role": "user",
        "content": [
            {"type": "input_text", "text": prompt},
            {
                "type": "input_image",
                "file_id": file_id,
            },
        ],
    }],
)


print(response.output_text)


### Panel 1
(Hiker standing with hands on hips, looking determined)
Hiker (narration): The thing about motivational quotes...

### Panel 2
(Hiker climbing a mountain, close-up of face)
Hiker (narration): is that they separate the mental sensation of accomplishment...

### Panel 3
(Close-up of ice axe in rock)
Hiker (narration): from the accomplishment itself.

### Panel 4
(Hiker climbing, reaching the top)
Hiker (narration): So, you can feel like you’ve done something...

### Panel 5
(Hiker lying on the ground, people walking by)
Hiker (narration): without actually doing it.

### Panel 6
(Hiker relaxing against a stop sign, drinking from a cup)
Hiker (narration): Which, if you consider how much work it is to actually do something... is a pretty sweet deal.
