In [1]:
import os
path = 'article.md'
title = 'Lesson #0201 — Audio i interfejs głosowy'

with open(path, 'r', encoding='utf-8') as f:
    article = f.read()

In [2]:
import os
import re
import requests
from typing import List

class Image:
    def __init__(self, alt, url, context, description, preview, name, file_path):
        self.alt = alt
        self.url = url
        self.context = context
        self.description = description
        self.preview = preview
        self.name = name
        self.file_path = file_path

def extract_images(article: str, folder_path: str) -> List[Image]:
    # Regular expression to match Markdown image syntax
    image_regex = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
    matches = image_regex.findall(article)

    images = []

    # Ensure the folder exists
    os.makedirs(folder_path, exist_ok=True)

    for alt, url in matches:
        try:
            name = url.split('/')[-1]
            response = requests.get(url, stream=True)
            if response.status_code != 200:
                print(f"Failed to fetch {url}: {response.status_code} {response.reason}")
                continue

            # Define the path to save the file
            file_path = os.path.join(folder_path, name)
            
            # Save the image to the specified folder
            with open(file_path, 'wb') as image_file:
                for chunk in response.iter_content(1024):
                    image_file.write(chunk)

            # Create Image object and append it to the list
            images.append(
                Image(
                    alt=alt,
                    url=url,
                    context='',
                    description='',
                    preview='',
                    name=name,
                    file_path=file_path,
                )
            )
        except Exception as e:
            print(f"Error processing image {url}: {e}")

    return images

In [3]:
folder_path = r"C:\Users\Dell\Documents\osobiste\baza\ai-devs3-examples\lessons\captions\images"
images = extract_images(article, folder_path)

In [4]:
images[0].file_path

'C:\\Users\\Dell\\Documents\\osobiste\\baza\\ai-devs3-examples\\lessons\\captions\\images\\S02E01-1727094804.png'

In [5]:
from prompts import preview_image_system_message

In [15]:
print(preview_image_system_message)

{'role': 'system', 'content': 'Generate a brief, factual description of the provided image based solely on its visual content.\n<prompt_objective>\nTo produce a concise description of the image that captures its essential visual elements without any additional context, and return it in JSON format.\n</prompt_objective>\n<prompt_rules>\n- ANALYZE the provided image thoroughly, noting key visual elements\n- GENERATE a brief, single paragraph description\n- FOCUS on main subjects, colors, composition, and overall style\n- AVOID speculation or interpretation beyond what is visually apparent\n- DO NOT reference any external context or information\n- MAINTAIN a neutral, descriptive tone\n- RETURN the result in JSON format with only \'name\' and \'preview\' properties\n</prompt_rules>\n<response_format>\n{{\n    "name": "filename with extension",\n    "preview": "A concise description of the image content"\n}}\n</response_format>\nProvide a succinct description that gives a clear overview of 

In [6]:
import base64
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [16]:
user_message_content = f"Describe the image {images[0].name} concisely. Focus on the main elements and overall composition. Return the result in JSON format with only 'name' and 'preview' properties."
messages = [
    {
        "role": "system", 
        "content": preview_image_system_message.get('content')
    },
    {
        "role": "user", 
        "content": [
            {
                "type": "text", 
                "text": user_message_content
            },
            {
                "type": "image_url",
                "image_url":
                {
                    "url": f"data:image/jpeg;base64,{encode_image(images[0].file_path)}",
                },
            },
        ]
    },
]

In [17]:
model_config = {
        "messages": messages,
        "model": "gpt-4o-mini",
        "json_mode": True,
        "name": "captions: preview_image"
    }

In [18]:
from openai_service import OpenAIService

llm_service = OpenAIService()
resp = llm_service.completion(model_config)

In [19]:
resp

ChatCompletion(id='chatcmpl-AVdrEBs1sNMoIeX4y38aQTwYoLK7o', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n    "name": "S02E01-1727094804.png",\n    "preview": "The image features a large microphone at the center, surrounded by vibrant green smoke and abstract digital elements. A figure in a dark cloak stands in front of the microphone, creating a contrast against the lighter background. The overall composition blends technology and a mystical atmosphere, with a focus on the microphone as the main subject."\n}\n```', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1732104744, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_3de1288069', usage=CompletionUsage(completion_tokens=89, prompt_tokens=48433, total_tokens=48522, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tok

In [None]:
import base64
from openai import OpenAI

client = OpenAI()

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image

# Getting the base64 string
base64_image = encode_image(images[0].file_path)

response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
        "role": "system", 
        "content": "Bądź grzeczny"
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "What is in this image?",
        },
        {
          "type": "image_url",
          "image_url": {
            "url":  f"data:image/jpeg;base64,{base64_image}"
          },
        },
      ],
    }
  ],
)

print(response.choices[0])

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The image features a large microphone surrounded by vibrant green smoke or abstract patterns. In the foreground, there’s a figure in a dark cloak standing in front of the microphone, contributing to a surreal, atmospheric feel. The overall color scheme emphasizes greens and blacks, creating a striking visual contrast.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))
