In [None]:
import cv2
import numpy as np
import torch
import torchvision
from collections import OrderedDict
import os

# Add CRAFT-pytorch repository to the Python path
import sys
sys.path.append('/content/CRAFT-pytorch')

from craft import CRAFT
from craft_utils import getDetBoxes, adjustResultCoordinates
from imgproc import loadImage, normalizeMeanVariance
from file_utils import saveResult

In [None]:
!git clone https://github.com/clovaai/CRAFT-pytorch.git

Cloning into 'CRAFT-pytorch'...
remote: Enumerating objects: 59, done.[K
remote: Total 59 (delta 0), reused 0 (delta 0), pack-reused 59[K
Receiving objects: 100% (59/59), 1.69 MiB | 25.84 MiB/s, done.
Resolving deltas: 100% (25/25), done.


In [None]:
%cd CRAFT-pytorch/

/content/CRAFT-pytorch


In [None]:
# Edit the file craft.py
!sed -i 's/from torchvision.models.vgg import model_urls//' basenet/vgg16_bn.py
!sed -i 's/model_urls\[.*\]/"https:\/\/download.pytorch.org\/models\/vgg16_bn-6c64b313.pth"/' basenet/vgg16_bn.py



In [None]:
!ls

basenet		   craft_utils.py  imgproc.py	README.md	  test.py
craft_mlt_25k.pth  figures	   LICENSE	refinenet.py
craft.py	   file_utils.py   __pycache__	requirements.txt


In [None]:
!pip install torch torchvision
!pip install opencv-python
!pip install gdown

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
!gdown --id 1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ -O craft_mlt_25k.pth


Downloading...
From: https://drive.google.com/uc?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ
To: /content/CRAFT-pytorch/craft_mlt_25k.pth
100% 83.2M/83.2M [00:01<00:00, 67.5MB/s]


In [None]:
import cv2
import numpy as np
import torch
from collections import OrderedDict
import os
# Add CRAFT-pytorch repository to the Python path
import sys
sys.path.append('/content/CRAFT-pytorch')
from craft import CRAFT
from craft_utils import getDetBoxes
from imgproc import normalizeMeanVariance
from file_utils import saveResult

# Helper functions
def copyStateDict(state_dict):
    if list(state_dict.keys())[0].startswith("module"):
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict

def load_craft_model(model_path):
    net = CRAFT()  # initialize
    net.load_state_dict(copyStateDict(torch.load(model_path, map_location='cpu')))
    net.eval()
    return net

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def detect_text_regions(image, net):
    x = normalizeMeanVariance(image)
    x = torch.from_numpy(x).permute(2, 0, 1)
    x = x.unsqueeze(0)

    with torch.no_grad():
        y, _ = net(x)

    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Lower thresholds to capture more of the line
    boxes, _ = getDetBoxes(score_text, score_link, 0.4, 0.3, 0.3)
    return boxes

def merge_boxes_to_lines(boxes, max_y_diff=15, x_padding=140):
    if len(boxes) <= 1:
        return boxes

    boxes = sorted(boxes, key=lambda x: (x[0][1] + x[1][1] + x[2][1] + x[3][1]) / 4)  # Sort by average y-coordinate
    merged_boxes = []
    current_line = boxes[0]

    for box in boxes[1:]:
        current_y = (current_line[0][1] + current_line[3][1]) / 2
        box_y = (box[0][1] + box[3][1]) / 2

        if abs(box_y - current_y) <= max_y_diff:
            # Merge boxes
            x_coords = [p[0] for b in (current_line, box) for p in b]
            y_coords = [p[1] for b in (current_line, box) for p in b]
            current_line = [
                [min(x_coords), min(y_coords)],
                [max(x_coords), min(y_coords)],
                [max(x_coords), max(y_coords)],
                [min(x_coords), max(y_coords)]
            ]
        else:
            merged_boxes.append(current_line)
            current_line = box

    merged_boxes.append(current_line)

    # Add padding to each side of the box
    padded_boxes = []
    for box in merged_boxes:
        padded_box = [
            [box[0][0] - x_padding, box[0][1]],
            [box[1][0] + x_padding, box[1][1]],
            [box[2][0] + x_padding, box[2][1]],
            [box[3][0] - x_padding, box[3][1]]
        ]
        padded_boxes.append(padded_box)

    return padded_boxes

def crop_text_lines(image, boxes, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i, box in enumerate(boxes):
        box = np.array(box).astype(np.int32)
        x_min = max(0, np.min(box[:, 0]))
        x_max = min(image.shape[1] - 1, np.max(box[:, 0]))
        y_min = max(0, np.min(box[:, 1]))
        y_max = min(image.shape[0] - 1, np.max(box[:, 1]))
        crop_img = image[y_min:y_max, x_min:x_max]

        # Only save if the cropped image is not empty
        if crop_img.size > 0:
            cv2.imwrite(f"{output_dir}/line_{i}.png", cv2.cvtColor(crop_img, cv2.COLOR_RGB2BGR))

# Main Function
def segment_text_lines(image_path, model_path, output_dir):
    # Load image
    image = preprocess_image(image_path)

    # Load CRAFT model
    net = load_craft_model(model_path)

    # Detect text regions
    boxes = detect_text_regions(image, net)

    # Merge boxes to form lines
    line_boxes = merge_boxes_to_lines(boxes)

    # Crop and save text lines
    crop_text_lines(image, line_boxes, output_dir)

# Specify paths
image_path = "/content/1.png"
model_path = "/content/CRAFT-pytorch/craft_mlt_25k.pth"
output_dir = "/content/output_directory2"

# Run the text line segmentation
segment_text_lines(image_path, model_path, output_dir)

In [None]:
import cv2
import numpy as np
import torch
from collections import OrderedDict
import os

# Add CRAFT-pytorch repository to the Python path
import sys
sys.path.append('/content/CRAFT-pytorch')

from craft import CRAFT
from craft_utils import getDetBoxes
from imgproc import normalizeMeanVariance
from file_utils import saveResult

# Helper functions
def copyStateDict(state_dict):
    if list(state_dict.keys())[0].startswith("module"):
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict

def load_craft_model(model_path):
    net = CRAFT()  # initialize
    net.load_state_dict(copyStateDict(torch.load(model_path, map_location='cpu')))
    net.eval()
    return net

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at path {image_path} could not be loaded.")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def detect_text_regions(image, net):
    x = normalizeMeanVariance(image)
    x = torch.from_numpy(x).permute(2, 0, 1)
    x = x.unsqueeze(0)

    with torch.no_grad():
        y, _ = net(x)

    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Lower thresholds to capture more of the line
    boxes, _ = getDetBoxes(score_text, score_link, 0.4, 0.3, 0.3)
    return boxes

def merge_boxes_to_lines(boxes, max_y_diff=15, x_padding=140):
    if len(boxes) <= 1:
        return boxes

    boxes = sorted(boxes, key=lambda x: (x[0][1] + x[1][1] + x[2][1] + x[3][1]) / 4)  # Sort by average y-coordinate
    merged_boxes = []
    current_line = boxes[0]

    for box in boxes[1:]:
        current_y = (current_line[0][1] + current_line[3][1]) / 2
        box_y = (box[0][1] + box[3][1]) / 2

        if abs(box_y - current_y) <= max_y_diff:
            # Merge boxes
            x_coords = [p[0] for b in (current_line, box) for p in b]
            y_coords = [p[1] for b in (current_line, box) for p in b]
            current_line = [
                [min(x_coords), min(y_coords)],
                [max(x_coords), min(y_coords)],
                [max(x_coords), max(y_coords)],
                [min(x_coords), max(y_coords)]
            ]
        else:
            merged_boxes.append(current_line)
            current_line = box

    merged_boxes.append(current_line)

    # Add padding to each side of the box
    padded_boxes = []
    for box in merged_boxes:
        padded_box = [
            [box[0][0] - x_padding, box[0][1]],
            [box[1][0] + x_padding, box[1][1]],
            [box[2][0] + x_padding, box[2][1]],
            [box[3][0] - x_padding, box[3][1]]
        ]
        padded_boxes.append(padded_box)

    return padded_boxes

def crop_text_lines(image, boxes, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i, box in enumerate(boxes):
        box = np.array(box).astype(np.int32)
        x_min = max(0, np.min(box[:, 0]))
        x_max = min(image.shape[1] - 1, np.max(box[:, 0]))
        y_min = max(0, np.min(box[:, 1]))
        y_max = min(image.shape[0] - 1, np.max(box[:, 1]))
        crop_img = image[y_min:y_max, x_min:x_max]

        # Only save if the cropped image is not empty
        if crop_img.size > 0:
            cv2.imwrite(f"{output_dir}/line_{i}.png", cv2.cvtColor(crop_img, cv2.COLOR_RGB2BGR))

# Main Function
def segment_text_lines(image_path, model_path, output_dir):
    # Load image
    image = preprocess_image(image_path)

    # Load CRAFT model
    net = load_craft_model(model_path)

    # Detect text regions
    boxes = detect_text_regions(image, net)

    # Merge boxes to form lines
    line_boxes = merge_boxes_to_lines(boxes)

    # Crop and save text lines
    crop_text_lines(image, line_boxes, output_dir)

# Specify paths
image_path = "/content/1.png"  # Update this to your image path
model_path = "/content/CRAFT-pytorch/craft_mlt_25k.pth"
output_dir = "/content/output_directory3"

# Run the text line segmentation
segment_text_lines(image_path, model_path, output_dir)


In [None]:
import cv2
import numpy as np
import torch
from collections import OrderedDict
import os

# Add CRAFT-pytorch repository to the Python path
import sys
sys.path.append('/content/CRAFT-pytorch')

from craft import CRAFT
from craft_utils import getDetBoxes
from imgproc import normalizeMeanVariance
from file_utils import saveResult

# Helper functions
def copyStateDict(state_dict):
    if list(state_dict.keys())[0].startswith("module"):
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict

def load_craft_model(model_path):
    net = CRAFT()  # initialize
    net.load_state_dict(copyStateDict(torch.load(model_path, map_location='cpu')))
    net.eval()
    return net

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at path {image_path} could not be loaded.")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def detect_text_regions(image, net):
    x = normalizeMeanVariance(image)
    x = torch.from_numpy(x).permute(2, 0, 1)
    x = x.unsqueeze(0)

    with torch.no_grad():
        y, _ = net(x)

    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Lower thresholds to capture more of the line
    boxes, _ = getDetBoxes(score_text, score_link, 0.2, 0.2, 0.2)
    return boxes

def merge_boxes_to_lines(boxes, image_height=1200,image_w=500, max_y_diff_upper=15, max_y_diff_lower=10):
    if len(boxes) <= 1:
        return boxes

    boxes = sorted(boxes, key=lambda x: (x[0][1] + x[1][1] + x[2][1] + x[3][1]) / 4)  # Sort by average y-coordinate
    merged_boxes = []
    current_line = boxes[0]

    for box in boxes[1:]:
        current_y = (current_line[0][1] + current_line[3][1]) / 2
        box_y = (box[0][1] + box[3][1]) / 2

        if box_y < image_height / 2:
            max_y_diff = max_y_diff_upper
        else:
            max_y_diff = max_y_diff_lower

        if abs(box_y - current_y) <= max_y_diff:
            # Merge boxes
            x_coords = [p[0] for b in (current_line, box) for p in b]
            y_coords = [p[1] for b in (current_line, box) for p in b]
            current_line = [
                [min(x_coords), min(y_coords)],
                [max(x_coords), min(y_coords)],
                [max(x_coords), max(y_coords)],
                [min(x_coords), max(y_coords)]
            ]
        else:
            merged_boxes.append(current_line)
            current_line = box

    merged_boxes.append(current_line)

    # Adjust the boxes to span the full width of the image
    image_width = image_w
    full_width_boxes = []
    for box in merged_boxes:
        # Convert the box to a NumPy array for slicing
        box_array = np.array(box)
        y_min = np.min(box_array[:, 1]) # Now you can use slicing
        y_max = np.max(box_array[:, 1])
        full_width_box = np.array([
            [0, y_min],
            [image_width - 1, y_min],
            [image_width - 1, y_max],
            [0, y_max]
        ])
        full_width_boxes.append(full_width_box)

    return full_width_boxes

# Main Function
def segment_text_lines(image_path, model_path, output_dir):
    # Load image
    image = preprocess_image(image_path)

    # Load CRAFT model
    net = load_craft_model(model_path)

    # Detect text regions
    boxes = detect_text_regions(image, net)

    # Merge boxes to form lines
    line_boxes = merge_boxes_to_lines(boxes)

    # Crop and save text lines
    crop_text_lines(image, line_boxes, output_dir)

# Specify paths
image_path = "/content/1.png"  # Update this to your image path
model_path = "/content/CRAFT-pytorch/craft_mlt_25k.pth"
output_dir = "/content/output_directory7"

# Run the text line segmentation
segment_text_lines(image_path, model_path, output_dir)
