In [5]:
import cv2
import pytesseract
from PIL import Image

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def ocr_readability_score(image):
    # Convert the image to PIL format
    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    # Perform OCR on the image
    ocr_result = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
    # Calculate the readability score based on the number of characters detected
    num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
    return num_chars

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    transformations = {
        "original": img,
        "rot90": cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
        "rot180": cv2.rotate(img, cv2.ROTATE_180),
        "rot270": cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
        "flip_horizontal": cv2.flip(img, 1),
        "flip_horizontal_rot90": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_CLOCKWISE),
        "flip_horizontal_rot180": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_180),
        "flip_horizontal_rot270": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_COUNTERCLOCKWISE)
    }

    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
best_img = find_best_orientation('dj/data/test/0a4f2decf34d3bff.jpg')
cv2.imwrite('dj/best_oriented_image.jpg', best_img)


Transformation: original, Readability Score: 0
Transformation: rot90, Readability Score: 0
Transformation: rot180, Readability Score: 0
Transformation: rot270, Readability Score: 0
Transformation: flip_horizontal, Readability Score: 0
Transformation: flip_horizontal_rot90, Readability Score: 0
Transformation: flip_horizontal_rot180, Readability Score: 0
Transformation: flip_horizontal_rot270, Readability Score: 0
Best transformation: original with score: 0


True

In [7]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def ocr_readability_score(image):
    try:
        # Convert the image to PIL format
        pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # Perform OCR on the image
        ocr_result = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    transformations = {
        "original": img,
        "rot90": cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
        "rot180": cv2.rotate(img, cv2.ROTATE_180),
        "rot270": cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
        "flip_horizontal": cv2.flip(img, 1),
        "flip_horizontal_rot90": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_CLOCKWISE),
        "flip_horizontal_rot180": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_180),
        "flip_horizontal_rot270": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_COUNTERCLOCKWISE)
    }

    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Transformation: original, Readability Score: 0
Transformation: rot90, Readability Score: 0
Transformation: rot180, Readability Score: 0
Transformation: rot270, Readability Score: 0
Transformation: flip_horizontal, Readability Score: 0
Transformation: flip_horizontal_rot90, Readability Score: 0
Transformation: flip_horizontal_rot180, Readability Score: 0
Transformation: flip_horizontal_rot270, Readability Score: 0
Best transformation: original with score: 0
Best oriented image saved to dj/best_oriented_image.jpg


In [8]:
import cv2
import pytesseract
from PIL import Image

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Denoise using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    return denoised

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image
        ocr_result = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    transformations = {
        "original": img,
        "rot90": cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
        "rot180": cv2.rotate(img, cv2.ROTATE_180),
        "rot270": cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
        "flip_horizontal": cv2.flip(img, 1),
        "flip_horizontal_rot90": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_CLOCKWISE),
        "flip_horizontal_rot180": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_180),
        "flip_horizontal_rot270": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_COUNTERCLOCKWISE)
    }

    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Transformation: original, Readability Score: 0
Transformation: rot90, Readability Score: 0
Transformation: rot180, Readability Score: 0
Transformation: rot270, Readability Score: 0
Transformation: flip_horizontal, Readability Score: 0
Transformation: flip_horizontal_rot90, Readability Score: 0
Transformation: flip_horizontal_rot180, Readability Score: 0
Transformation: flip_horizontal_rot270, Readability Score: 0
Best transformation: original with score: 0
Best oriented image saved to dj/best_oriented_image.jpg


Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: original, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot90, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot180, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot270, Readability Score: 0
Error in OCR processing: [Errno

In [11]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    return thresh

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration
        custom_config = r'--oem 3 --psm 6'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    transformations = {
        "original": img,
        "rot90": cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
        "rot180": cv2.rotate(img, cv2.ROTATE_180),
        "rot270": cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
        "flip_horizontal": cv2.flip(img, 1),
        "flip_horizontal_rot90": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_CLOCKWISE),
        "flip_horizontal_rot180": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_180),
        "flip_horizontal_rot270": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_COUNTERCLOCKWISE)
    }

    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Transformation: original, Readability Score: 490
Transformation: rot90, Readability Score: 550
Transformation: rot180, Readability Score: 486
Transformation: rot270, Readability Score: 535
Transformation: flip_horizontal, Readability Score: 471
Transformation: flip_horizontal_rot90, Readability Score: 498
Transformation: flip_horizontal_rot180, Readability Score: 533
Transformation: flip_horizontal_rot270, Readability Score: 509
Best transformation: rot90 with score: 550
Best oriented image saved to dj/best_oriented_image.jpg


In [12]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    return thresh

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration
        custom_config = r'--oem 3 --psm 6'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def detect_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    # Apply transformations
    transformations = {
        "original": img,
        "rot90": cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE),
        "rot180": cv2.rotate(img, cv2.ROTATE_180),
        "rot270": cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE),
        "flip_horizontal": cv2.flip(img, 1),
        "flip_horizontal_rot90": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_CLOCKWISE),
        "flip_horizontal_rot180": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_180),
        "flip_horizontal_rot270": cv2.rotate(cv2.flip(img, 1), cv2.ROTATE_90_COUNTERCLOCKWISE)
    }

    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = detect_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Transformation: original, Readability Score: 490
Transformation: rot90, Readability Score: 550
Transformation: rot180, Readability Score: 486
Transformation: rot270, Readability Score: 535
Transformation: flip_horizontal, Readability Score: 471
Transformation: flip_horizontal_rot90, Readability Score: 498
Transformation: flip_horizontal_rot180, Readability Score: 533
Transformation: flip_horizontal_rot270, Readability Score: 509
Best transformation: rot90 with score: 550
Best oriented image saved to dj/best_oriented_image.jpg


Transformation: rot0, Readability Score: 490
Transformation: rot15, Readability Score: 455
Transformation: rot30, Readability Score: 335
Transformation: rot45, Readability Score: 312
Transformation: rot60, Readability Score: 259
Transformation: rot75, Readability Score: 275
Transformation: rot90, Readability Score: 349
Transformation: rot105, Readability Score: 292
Transformation: rot120, Readability Score: 334
Transformation: rot135, Readability Score: 267
Transformation: rot150, Readability Score: 294
Transformation: rot165, Readability Score: 393
Transformation: rot180, Readability Score: 486
Transformation: rot195, Readability Score: 484
Transformation: rot210, Readability Score: 357
Transformation: rot225, Readability Score: 330
Transformation: rot240, Readability Score: 302
Transformation: rot255, Readability Score: 308
Transformation: rot270, Readability Score: 378
Transformation: rot285, Readability Score: 354
Transformation: rot300, Readability Score: 350
Transformation: rot31

In [37]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    return thresh

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration
        custom_config = r'--oem 3 --psm 6'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345]
    
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    transformations.update({f"flip_vertical_rot{angle}": rotate_image(cv2.flip(img, 0), angle) for angle in angles})
    transformations.update({f"flip_both_rot{angle}": rotate_image(cv2.flip(cv2.flip(img, 1), 0), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot0, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot15, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transforma

KeyError: 'original'

Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot0, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot15, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot30, Readability Score: 0
Error in OCR processing: [Errno 2] Unable to synchronously open file (unable to open file: name = 'esrgan_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
Transformation: rot45, Readability Score: 0
Error in OCR processing: [Errno 2] Un

KeyError: 'original'

In [18]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    return denoised

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration
        custom_config = r'--oem 3 --psm 6'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345]
    
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Transformation: rot0, Readability Score: 434
Transformation: rot15, Readability Score: 330
Transformation: rot30, Readability Score: 221
Transformation: rot45, Readability Score: 365
Transformation: rot60, Readability Score: 330
Transformation: rot75, Readability Score: 312
Transformation: rot90, Readability Score: 349
Transformation: rot105, Readability Score: 337
Transformation: rot120, Readability Score: 290
Transformation: rot135, Readability Score: 318
Transformation: rot150, Readability Score: 219
Transformation: rot165, Readability Score: 245
Transformation: rot180, Readability Score: 401
Transformation: rot195, Readability Score: 247
Transformation: rot210, Readability Score: 222
Transformation: rot225, Readability Score: 374
Transformation: rot240, Readability Score: 200
Transformation: rot255, Readability Score: 317
Transformation: rot270, Readability Score: 303
Transformation: rot285, Readability Score: 258
Transformation: rot300, Readability Score: 222
Transformation: rot31

In [34]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
import tensorflow as tf

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)
    
    return enhanced_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration
        custom_config = r'--oem 3 --psm 6'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345]
    
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = '/dj/data/test/0a5bf6ba56f069c5.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot0, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot15, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'eng\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transforma

KeyError: 'original'

In [23]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
import os

# Set the TESSDATA_PREFIX environment variable
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/'

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)
    
    return enhanced_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration for Korean
        custom_config = r'--oem 3 --psm 6 -l kor'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345]
    
    transformations = {"original": img}
    transformations.update({f"rot{angle}": rotate_image(img, angle) for angle in angles})
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations[best_transformation]

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: original, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot0, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transfo

In [24]:
import os
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/'

In [25]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
import os

# Set the TESSDATA_PREFIX environment variable
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/'

# Specify the path to the Tesseract executable if necessary
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust the path as needed

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)
    
    return enhanced_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Convert the image to PIL format
        pil_image = Image.fromarray(preprocessed_image)
        
        # Perform OCR on the image with additional configuration for Korean
        custom_config = r'--oem 3 --psm 6 -l kor'
        ocr_result = pytesseract.image_to_data(pil_image, config=custom_config, output_type=pytesseract.Output.DICT)
        
        # Calculate the readability score based on the number of characters detected
        num_chars = sum(len(word) for word in ocr_result['text'] if word.strip())
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 90, 180, 270]
    transformations = {"original": img}
    transformations.update({f"rot{angle}": rotate_image(img, angle) for angle in angles})
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: original, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transformation: rot0, Readability Score: 0
Error in OCR processing: (1, 'Error opening data file /usr/share/tesseract-ocr/4.00/kor.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'kor\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
Transfo

In [26]:
import cv2
import easyocr
from PIL import Image
import numpy as np

# Initialize the EasyOCR reader
reader = easyocr.Reader(['ko'], gpu=False)  # 'ko' is the language code for Korean

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)
    
    return enhanced_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Perform OCR on the image
        result = reader.readtext(preprocessed_image)
        
        # Calculate the readability score based on the number of detected elements
        num_chars = sum(len(text[1]) for text in result)
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 90, 180, 270]
    transformations = {"original": img}
    transformations.update({f"rot{angle}": rotate_image(img, angle) for angle in angles})
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Using CPU. Note: This module is much faster with a GPU.
Downloading detection model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.1% Complete

  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  state_dict = torch.load(model_path, map_location=device)


Transformation: original, Readability Score: 0
Transformation: rot0, Readability Score: 0
Transformation: rot90, Readability Score: 0
Transformation: rot180, Readability Score: 0
Transformation: rot270, Readability Score: 0
Transformation: flip_horizontal_rot0, Readability Score: 0
Transformation: flip_horizontal_rot90, Readability Score: 0
Transformation: flip_horizontal_rot180, Readability Score: 0
Transformation: flip_horizontal_rot270, Readability Score: 0
Best transformation: original with score: 0
Best oriented image saved to dj/best_oriented_image.jpg


In [27]:
import cv2
import easyocr
from PIL import Image
import numpy as np

# Initialize the EasyOCR reader
reader = easyocr.Reader(['ko'], gpu=False)  # 'ko' is the language code for Korean

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding (binarization)
    binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Apply morphological operations to remove noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Sharpen the image to enhance edges
    sharpen_kernel = np.array([[-1, -1, -1], 
                               [-1, 9, -1], 
                               [-1, -1, -1]])
    sharpened = cv2.filter2D(denoised, -1, sharpen_kernel)
    
    return sharpened

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Perform OCR on the image
        result = reader.readtext(preprocessed_image)
        
        # Calculate the readability score based on the number of detected elements
        num_chars = sum(len(text[1]) for text in result)
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 90, 180, 270]
    transformations = {"original": img}
    transformations.update({f"rot{angle}": rotate_image(img, angle) for angle in angles})
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Using CPU. Note: This module is much faster with a GPU.


Transformation: original, Readability Score: 0
Transformation: rot0, Readability Score: 0
Transformation: rot90, Readability Score: 0
Transformation: rot180, Readability Score: 0
Transformation: rot270, Readability Score: 0
Transformation: flip_horizontal_rot0, Readability Score: 0
Transformation: flip_horizontal_rot90, Readability Score: 0
Transformation: flip_horizontal_rot180, Readability Score: 0
Transformation: flip_horizontal_rot270, Readability Score: 0
Best transformation: original with score: 0
Best oriented image saved to dj/best_oriented_image.jpg


In [28]:
import cv2
from paddleocr import PaddleOCR
import numpy as np

# Initialize the PaddleOCR reader
ocr = PaddleOCR(use_angle_cls=True, lang='korean')  # 'korean' is the language code for Korean

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding (binarization)
    binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Apply morphological operations to remove noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Sharpen the image to enhance edges
    sharpen_kernel = np.array([[-1, -1, -1], 
                               [-1, 9, -1], 
                               [-1, -1, -1]])
    sharpened = cv2.filter2D(denoised, -1, sharpen_kernel)
    
    return sharpened

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Perform OCR on the image
        result = ocr.ocr(preprocessed_image, cls=True)
        
        # Calculate the readability score based on the number of detected elements
        num_chars = sum(len(line[1][0]) for line in result[0])
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = [0, 90, 180, 270]
    transformations = {"original": img}
    transformations.update({f"rot{angle}": rotate_image(img, angle) for angle in angles})
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


download https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar to /data/ephemeral/home/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/Multilingual_PP-OCRv3_det_infer.tar


100%|██████████| 3.85M/3.85M [00:03<00:00, 1.10MiB/s]


download https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tar to /data/ephemeral/home/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/korean_PP-OCRv4_rec_infer.tar


100%|██████████| 24.4M/24.4M [00:03<00:00, 6.40MiB/s]


download https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar to /data/ephemeral/home/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.tar


100%|██████████| 2.19M/2.19M [00:16<00:00, 130kiB/s] 

[2024/08/01 11:41:50] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/data/ephemeral/home/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/data/ephemeral/home/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 




[2024/08/01 11:41:52] ppocr DEBUG: dt_boxes num : 8, elapsed : 0.29978370666503906
[2024/08/01 11:41:52] ppocr DEBUG: cls num  : 8, elapsed : 0.10980653762817383
[2024/08/01 11:41:53] ppocr DEBUG: rec_res num  : 8, elapsed : 0.6940979957580566
Transformation: original, Readability Score: 5
[2024/08/01 11:41:53] ppocr DEBUG: dt_boxes num : 8, elapsed : 0.1017310619354248
[2024/08/01 11:41:53] ppocr DEBUG: cls num  : 8, elapsed : 0.010725736618041992
[2024/08/01 11:41:53] ppocr DEBUG: rec_res num  : 8, elapsed : 0.4693927764892578
Transformation: rot0, Readability Score: 5
[2024/08/01 11:41:53] ppocr DEBUG: dt_boxes num : 0, elapsed : 0.09248590469360352
[2024/08/01 11:41:53] ppocr DEBUG: cls num  : 0, elapsed : 0
[2024/08/01 11:41:53] ppocr DEBUG: rec_res num  : 0, elapsed : 1.430511474609375e-06
Error in OCR processing: 'NoneType' object is not iterable
Transformation: rot90, Readability Score: 0
[2024/08/01 11:41:54] ppocr DEBUG: dt_boxes num : 6, elapsed : 0.044309139251708984
[2024/

In [36]:
import cv2
from paddleocr import PaddleOCR
import numpy as np

# Initialize the PaddleOCR reader
ocr = PaddleOCR(use_angle_cls=True, lang='korean')  # 'korean' is the language code for Korean

def preprocess_image(image):
    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply adaptive thresholding (binarization)
    binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Apply morphological operations to remove noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Sharpen the image to enhance edges
    sharpen_kernel = np.array([[-1, -1, -1], 
                               [-1, 9, -1], 
                               [-1, -1, -1]])
    sharpened = cv2.filter2D(denoised, -1, sharpen_kernel)
    
    # Further denoising
    denoised = cv2.fastNlMeansDenoising(sharpened, None, 30, 7, 21)
    
    return denoised

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Perform OCR on the image
        result = ocr.ocr(preprocessed_image, cls=True)
        
        # Calculate the readability score based on the number of detected elements
        num_chars = sum(len(line[1][0]) for line in result[0])
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w // 2, h // 2)
    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def find_best_orientation(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = list(range(0, 360, 15))  # Rotate every 15 degrees
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations["original"] = img
    
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(cv2.flip(img, 1), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = '/dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    cv2.imwrite(output_path, best_img)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


[2024/08/01 14:03:32] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/data/ephemeral/home/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/data/ephemeral/home/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 

In [32]:
import torch
from PIL import Image
from transformers import DonutProcessor, VisionEncoderDecoderModel

# Load the pre-trained Donut model and processor
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")

def preprocess_image(image):
    # Resize and convert image to RGB
    resized_image = image.resize((1000, 1000)).convert("RGB")
    
    return resized_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Prepare the image for Donut
        pixel_values = processor(preprocessed_image, return_tensors="pt").pixel_values
        
        # Perform OCR using Donut
        generated_ids = model.generate(pixel_values, max_length=512)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        # Calculate the readability score based on the number of detected elements
        num_chars = len(generated_text)
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Rotate the image by the specified angle
    return image.rotate(angle, expand=True)

def find_best_orientation(image_path):
    # Load the image
    img = Image.open(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = list(range(0, 360, 15))  # Rotate every 15 degrees
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations["original"] = img
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(img.transpose(Image.FLIP_LEFT_RIGHT), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    best_img.save(output_path)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


TypeError: Couldn't build proto file into descriptor pool: duplicate file name sentencepiece_model.proto

In [33]:
import torch
from PIL import Image
from transformers import DonutProcessor, VisionEncoderDecoderModel

# Load the pre-trained Donut model and processor
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")

def preprocess_image(image):
    # Resize and convert image to RGB
    resized_image = image.resize((1000, 1000)).convert("RGB")
    return resized_image

def ocr_readability_score(image):
    try:
        # Preprocess the image
        preprocessed_image = preprocess_image(image)
        
        # Prepare the image for Donut
        pixel_values = processor(preprocessed_image, return_tensors="pt").pixel_values
        
        # Perform OCR using Donut
        generated_ids = model.generate(pixel_values, max_length=512)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        # Calculate the readability score based on the number of detected elements
        num_chars = len(generated_text)
        return num_chars
    except Exception as e:
        print(f"Error in OCR processing: {e}")
        return 0

def rotate_image(image, angle):
    # Rotate the image by the specified angle
    return image.rotate(angle, expand=True)

def find_best_orientation(image_path):
    # Load the image
    img = Image.open(image_path)
    
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
    
    angles = list(range(0, 360, 15))  # Rotate every 15 degrees
    transformations = {f"rot{angle}": rotate_image(img, angle) for angle in angles}
    transformations["original"] = img
    transformations.update({f"flip_horizontal_rot{angle}": rotate_image(img.transpose(Image.FLIP_LEFT_RIGHT), angle) for angle in angles})
    
    best_score = 0
    best_transformation = "original"
    
    for key, transformed_img in transformations.items():
        score = ocr_readability_score(transformed_img)
        print(f"Transformation: {key}, Readability Score: {score}")
        if score > best_score:
            best_score = score
            best_transformation = key

    print(f"Best transformation: {best_transformation} with score: {best_score}")
    return transformations.get(best_transformation, img)

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
best_img = find_best_orientation(image_path)

if best_img is not None:
    output_path = 'dj/best_oriented_image.jpg'
    best_img.save(output_path)
    print(f"Best oriented image saved to {output_path}")
else:
    print("No image to save.")


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


TypeError: Couldn't build proto file into descriptor pool: duplicate file name sentencepiece_model.proto