In [17]:
from ultralytics import YOLO
import io
import re
import textwrap
from PIL import Image, ImageDraw, ImageFont
import tempfile

In [18]:
model = YOLO('best.pt')

font = ImageFont.truetype(
    "./NotoSansCJK.ttc",
    size=12,
    index=7
)

In [19]:
# img_url = "./test_images/test_3.png"
# results = model.predict(img_url)

# results[0].save_crop("./bubble_test_results") #save cropped bubbles
# results[0].show()

In [20]:
# for box in results[0].boxes:
#     # print(box)
#     x1, y1, x2, y2 = box.xyxy[0].tolist()  # Convert to list
#     print(box.conf)
#     print(x1,y1,x2,y2)
#     print("===================")

In [21]:
from backend.services.glmocr_service import OCR_Service
from pathlib import Path
base = Path.cwd()
ocr_path = base / "backend/models/GlmOcr"
ocr_service = OCR_Service(ocr_path)

Loading weights: 100%|██████████| 510/510 [00:00<00:00, 1216.07it/s, Materializing param=model.visual.post_layernorm.weight]                           


In [22]:
from backend.services.tencentHY_service import Translate_Service

translate_path = base / "backend/models/TencentHY" 
translate_service = Translate_Service(translate_path)


Unrecognized keys in `rope_parameters` for 'rope_type'='dynamic': {'beta_fast', 'alpha', 'mscale', 'rope_theta', 'beta_slow', 'mscale_all_dim'}
Loading weights: 100%|██████████| 355/355 [00:00<00:00, 1064.18it/s, Materializing param=model.norm.weight]                               


In [23]:
# img_url = base / "test_images" / "test_4.png"
# result = ocr_service.runOCR(img_url)
# result = re.sub(r'[\u2028\u2029]+', ' ', result) #remove new line
# print(result)

In [24]:
def upscale_for_ocr(img, scale=2):
    w, h = img.size
    return img.resize((w*scale, h*scale), Image.BICUBIC)

In [25]:
def get_wrapped_text(text, font, max_width):
    lines = []
    words = text.split(' ') # Split by words for English
    current_line = []

    for word in words:
        # Check if adding the next word exceeds the width
        test_line = ' '.join(current_line + [word])
        # getlength() is more accurate than getbbox for text width
        if font.getlength(test_line) <= max_width:
            current_line.append(word)
        else:
            lines.append(' '.join(current_line))
            current_line = [word]
    
    lines.append(' '.join(current_line))
    return lines

def fit_text_to_box(draw, text, box_coords, font_path, initial_size=20):
    x1, y1, x2, y2 = box_coords
    target_width = x2 - x1
    target_height = y2 - y1
    
    current_size = initial_size
    lines = []
    
    # Loop to shrink font until it fits both width and height
    while current_size > 8:
        font = ImageFont.truetype(font_path, size=current_size)
        lines = get_wrapped_text(text, font, target_width)
        
        # Calculate total height of the wrapped text block
        line_height = font.getbbox("Ay")[3] 
        total_height = line_height * len(lines)
        
        if total_height <= target_height:
            break # It fits!
        current_size -= 1 # Shrink and try again

    return lines, font, current_size

In [26]:
def draw_boxes(image_path, results, output_path="detected_manga.png"):
    # Load original image
    img = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    i=0
    for result in results:
        for box in result.boxes:
            # Get coordinates as a list of floats
            coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
            draw.rectangle(coords, outline="red", width=1)
            
            # label
            conf = box.conf[0].item()
            box_cropped = img.crop(coords)
            box_cropped = upscale_for_ocr(box_cropped, scale=3)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
                box_cropped.save(f.name)      
                temp_path = f.name


            text = ocr_service.runOCR(temp_path) #OCR'd text
            text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
            translated_text = translate_service.translate(text)
            print(text)
            print(translated_text)
            print("====")
            # draw.text(
            #     (coords[0], coords[1] - 10), 
            #     # f"T=={conf:.2f}  {translated_text}",
            #     translated_text,  
            #     fill="red", 
            #     font=font
            # )
            # draw_wrapped_text(draw, translated_text, coords, font, fill="black")

            #wipe the space
            draw.rectangle(coords, fill="white", outline="white")
            # 1. Calculate the best fit
            lines, best_font, final_size = fit_text_to_box(draw, translated_text, coords, "./NotoSansCJK.ttc")

            # 2. Vertical Centering Logic
            line_h = best_font.getbbox("Ay")[3]
            total_h = line_h * len(lines)
            start_y = coords[1] + ( (coords[3] - coords[1]) - total_h ) / 2

            # 3. Draw each line centered horizontally
            for line in lines:
                line = re.sub(r'[\n\r\u2028\u2029]+', ' ', line) #remove new lines
                line_w = draw.textlength(line, font=best_font)
                start_x = coords[0] + ( (coords[2] - coords[0]) - line_w ) / 2
                draw.text((start_x, start_y), line, font=best_font, fill="black")
                start_y += line_h

    img.save(output_path)
    img.show()

In [27]:
img_url = "./test_images/test_2.png"
results = model.predict(img_url)
# results[0].save_crop("./bubble_test_results") #save cropped bubbles
draw_boxes(img_url, results)


image 1/1 c:\Users\tonyl\Documents\Multimodal-Manga-Translator\test_images\test_2.png: 1024x1280 15 texts, 128.4ms
Speed: 7.3ms preprocess, 128.4ms inference, 0.2ms postprocess per image at shape (1, 3, 1024, 1280)
明日は忍者学校の つつあげしけん 卒業試験だぞ!! 赤児かい お前は 前回もその前も しけん 試験に落ちてる!!
Tomorrow is the graduation exam for the Ninja School! You're like a toddler, aren't you? You failed the ninja exams both times before!!
====
化 は る こと そ つ く り に
The process of transformation is creating something new.
====
ねあく いもそ なし!! ろく
Never mind, it's okay!! Six
====
ぜお前の さんぺん
Your foolishness
====
今日の授業は 変化の術の 復習テストだ 全員 並べ
Today’s class is a review test on the art of change. Everyone, line up!
====
外でいたすら してる場合じゃ ないだろ バカヤロ
It's not the time to even think about helping others, idiot.
====
変化!!!
Change!!!
====
O ㉑ ㉒ K ㉓ ㉔ ! ㉕
You are a professional Manhua translator. This task requires translating dialogue from Chinese into natural, punchy English used in action manga. Please use slang appropriate for the genre, suc