In [1]:
# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("olmOCR-7B-0225-preview")
model = AutoModelForImageTextToText.from_pretrained("olmOCR-7B-0225-preview")

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.17it/s]


In [5]:
import torch
import base64
from PIL import Image
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration

# Initialize the model
model = Qwen2VLForConditionalGeneration.from_pretrained("olmOCR-7B-0225-preview", torch_dtype=torch.bfloat16).eval()
processor = AutoProcessor.from_pretrained("olmOCR-7B-0225-preview")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def image_to_base64(image_path):
    """将本地图片转换为base64字符串"""
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode('utf-8')

# Directly use a local image file path
image_path = "images/t3.png"
image_base64 = image_to_base64(image_path)

# Build the prompt, for demonstration, using a placeholder text.
prompt = "识别图片中所有信息"

# Build the full prompt
messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": prompt},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
        ],
    }
]

# Apply the chat template and processor
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
main_image = Image.open(image_path)

inputs = processor(
    text=[text],
    images=[main_image],
    padding=True,
    return_tensors="pt",
)
inputs = {key: value.to(device) for (key, value) in inputs.items()}

# Generate the output
output = model.generate(
    **inputs,
    temperature=0.8,
    max_new_tokens=1024,
    num_return_sequences=1,
    do_sample=True,
)


# Decode the output
prompt_length = inputs["input_ids"].shape[1]
new_tokens = output[:, prompt_length:]
text_output = processor.tokenizer.batch_decode(
    new_tokens, skip_special_tokens=True
)

print(text_output)

Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  9.80it/s]


['就诊日期：2025-01-02 08:20  就诊科室：牙周病科  复诊\n\n姓名：[姓名]  性别：男  年龄：29岁\n\n主诉：牙周复诊\n\n现病史：牙周复查，1周前行全口龈上洁治+局部龈下刮治。\n\n既往史：否认高血压，糖尿病，心血管疾病等系统病史\n\n查体：PLI：2，可及龈下牙石，牙龈充血水肿，BOP（+），PD2-5mm，GR约0-2mm，7|6|7松动II°，5|4|3|2|1|2|3|4|5松动I°。\n\n辅助检查：全景片示：8|近中阻生。7|远中牙槽骨吸收达根长1/3。\n\n诊断：牙周病\n\n处理：1. OHI。\n\n2. 今行全口龈下刮治，3%H2O2冲洗，止血。告医嘱。\n\n3. 预约牙周复诊时间：[时间]。告知复诊需挂预约号，复诊当日按时到院。\n\n4. 建议口外科会诊拔除8|8。']


In [16]:
import os
import json
import pandas as pd
import re

def append_to_excel_safely(chat_response, excel_path):
    try:
        match = re.search(r"```json\s*(.*?)\s*```", chat_response, re.DOTALL)
        if not match:
            raise ValueError("未找到有效的 JSON 数据")
        
        json_str = match.group(1).strip()
        # 将JSON字符串加载为Python字典
        data_dict = json.loads(json_str)
        # 将字典转换为DataFrame
        df_new = pd.DataFrame([data_dict])
        
        if os.path.exists(excel_path):
            # 如果文件存在，读取现有的Excel文件
            df_existing = pd.read_excel(excel_path)
            
            # 确保所有列都在最终的DataFrame中
            all_columns = sorted(set(df_existing.columns.union(df_new.columns)), key=lambda x: (x in df_existing.columns, x))
            df_existing = df_existing.reindex(columns=all_columns, fill_value='')
            df_new = df_new.reindex(columns=all_columns, fill_value='')
            
            # 避免重复添加相同的数据
            df_combined = pd.concat([df_existing, df_new[~df_new.isin(df_existing.to_dict('list')).all(axis=1)]], ignore_index=True)
        else:
            df_combined = df_new
        
        # 写入Excel文件
        df_combined.to_excel(excel_path, index=False, sheet_name='Sheet1', engine='openpyxl')
        print(f"数据已成功写入或追加到 {excel_path}")
    except Exception as e:
        print(f"发生错误：{e}")

# 示例调用
chat_response = "```json\n{\"Image_Name\": \"example1.jpg\", \"Text_Output\": \"A d22og\", \"AI_Response\": \"It's a cute22 dog.\"}\n```"
excel_path = 'results.xlsx'
append_to_excel_safely(chat_response, excel_path)

数据已成功写入或追加到 results.xlsx
