In [12]:
import ollama
import base64
import os

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def transcribe_image(image_path):
    base64_image = encode_image(image_path)
    
    # First turn: Request plain text transcription
    messages = [
        {
            'role': 'user',
            'content': [
                {
                    'type': 'image',
                    'image': base64_image
                },
                {
                    'type': 'text',
                    'text': "Please transcribe the handwritten text in this image as plain text. Pay attention to uppercase and lowercase letters, as well as any bold, italic, underlined, or struck-through text. Do not include any HTML formatting at this stage."
                }
            ]
        }
    ]

    response = ollama.chat(model='minicpm-v', messages=messages)
    text_transcription = response['message']['content']

    # Second turn: Request HTML formatting
    messages.append({
        'role': 'assistant',
        'content': text_transcription
    })
    messages.append({
        'role': 'user',
        'content': "Based on the transcription you just provided, create a complete, correctly formatted styled HTML document. Use appropriate HTML tags such as <b>, <i>, <u>, and <s> to represent bold, italic, underlined, and struck-through text respectively. Start the HTML document with <!DOCTYPE html>. Ensure that the HTML structure accurately reflects the layout and formatting of the original handwritten text. Do not use CSS related elements."
    })

    response = ollama.chat(model='minicpm-v', messages=messages)
    html_transcription = response['message']['content']

    return text_transcription, html_transcription

# Example usage
image_folder = "BGdataset/images"  # Replace with your image folder path
output_folder_txt = "transcriptionsBG_minicpm/txt"  # Replace with your desired output folder path for text
output_folder_html = "transcriptionsBG_minicpm/html"  # Replace with your desired output folder path for HTML

# Create the output folders if they don't exist
os.makedirs(output_folder_txt, exist_ok=True)
os.makedirs(output_folder_html, exist_ok=True)

# Process all images in the folder
for filename in os.listdir(image_folder):
    if filename.endswith(('.jpg', '.jpeg', '.png', '.gif')):
        image_path = os.path.join(image_folder, filename)
        
        # Transcribe the image
        text_transcription, html_transcription = transcribe_image(image_path)
        
        # Save the text transcription
        txt_output_path = os.path.join(output_folder_txt, f"{os.path.splitext(filename)[0]}_transcription.txt")
        with open(txt_output_path, 'w', encoding='utf-8') as f:
            f.write(text_transcription)
        
        # Save the HTML transcription
        html_output_path = os.path.join(output_folder_html, f"{os.path.splitext(filename)[0]}_transcription.html")
        with open(html_output_path, 'w', encoding='utf-8') as f:
            f.write(html_transcription)
        
        print(f"Transcriptions for {filename} saved to {txt_output_path} and {html_output_path}")

print("All images have been processed.")

The sun emits light as a mixture of different colors.  This causes it to appear white or yellowish from our perspective on earth, because all these wavelengths reflect off every part of the atmosphere at about equal rates and so we see that color most prominently.

However, not all visible wavelengths are equally well absorbed by molecules in Earth's atmosphere; some like violet light get scattered more effectively than others.  The result is an increased proportion being reflected towards us which makes it look blue to our eyes.
