In [1]:
import cv2
import numpy as np

def preprocess_image(image_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Resize the image
    resized_image = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(resized_image, (3, 3), 0)
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)
    
    # Sharpen the image
    sharpen_kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]])
    sharpened = cv2.filter2D(enhanced_image, -1, sharpen_kernel)
    
    # Edge detection to enhance lines
    edges = cv2.Canny(sharpened, 50, 150, apertureSize=3)
    
    # Ensure edges and sharpened image have the same shape
    if len(edges.shape) == 2:
        edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
    else:
        edges_colored = edges

    # Ensure both images are the same size
    sharpened_colored = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)

    # Combine the edges with the sharpened image
    combined = cv2.addWeighted(sharpened_colored, 0.8, edges_colored, 0.2, 0)

    return combined

def save_preprocessed_image(input_path, output_path):
    try:
        processed_image = preprocess_image(input_path)
        cv2.imwrite(output_path, processed_image)
        print(f"Preprocessed image saved to '{output_path}'")
    except Exception as e:
        print("Error during preprocessing:", e)

# Example usage
image_path = '/dj/data/test/4a8b25f0c83e9553.jpg'
output_path = '/dj/enhanced_image_4.png'
save_preprocessed_image(image_path, output_path)


Preprocessed image saved to '/dj/enhanced_image_4.png'


In [4]:
import cv2
import pytesseract

def preprocess_image(image_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)

    # Additional noise removal using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    denoised = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

    # Apply CLAHE to improve contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(denoised)

    return enhanced_image

def ocr_image(image_path):
    processed_image = preprocess_image(image_path)
    text = pytesseract.image_to_string(processed_image, lang='kor')
    return text

# Example usage
image_path = '/dj/data/test/4a8b25f0c83e9553.jpg'
extracted_text = ocr_image(image_path)
print(extracted_text)

 



In [5]:
import cv2
import numpy as np

def preprocess_image(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    print(f"Original image shape: {image.shape}")

    # Resize image to improve processing
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    print(f"Resized image shape: {resized_image.shape}")

    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    print(f"Grayscale image shape: {gray.shape}")

    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Edge enhancement using Canny Edge Detection
    edges = cv2.Canny(blurred, 100, 200)
    print(f"Edges image shape: {edges.shape}")

    # Dilate edges to make them more prominent
    dilated_edges = cv2.dilate(edges, np.ones((1, 1), np.uint8), iterations=1)
    print(f"Dilated edges image shape: {dilated_edges.shape}")

    # Convert edges back to BGR
    edges_colored = cv2.cvtColor(dilated_edges, cv2.COLOR_GRAY2BGR)
    print(f"Edges colored image shape: {edges_colored.shape}")

    # Sharpen the image
    kernel_sharpening = np.array([[-1,-1,-1], 
                                  [-1, 9,-1],
                                  [-1,-1,-1]])
    sharpened = cv2.filter2D(resized_image, -1, kernel_sharpening)
    print(f"Sharpened image shape: {sharpened.shape}")

    # Combine the edges with the sharpened image
    combined = cv2.addWeighted(sharpened, 0.8, edges_colored, 0.2, 0)
    print(f"Combined image shape: {combined.shape}")

    # Save the preprocessed image
    cv2.imwrite(output_path, combined)
    print(f"Preprocessed image saved to '{output_path}'")
    
    
image_path = '/dj/data/test/0a95b7e3f2bdc376.jpg'
output_path = '/dj/enhanced_image_5.png'
preprocess_image(image_path, output_path)





Original image shape: (591, 443, 3)
Resized image shape: (1182, 886, 3)
Grayscale image shape: (1182, 886)
Edges image shape: (1182, 886)
Dilated edges image shape: (1182, 886)
Edges colored image shape: (1182, 886, 3)
Sharpened image shape: (1182, 886, 3)
Combined image shape: (1182, 886, 3)
Preprocessed image saved to '/dj/enhanced_image_5.png'


In [2]:
import cv2
import numpy as np
from skimage import restoration, exposure

def enhance_document_image(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    print(f"Original image shape: {image.shape}")

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    print(f"Grayscale image shape: {gray.shape}")

    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Apply adaptive histogram equalization to enhance contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_contrast = clahe.apply(blurred)

    # Edge enhancement using Canny edge detection
    edges = cv2.Canny(enhanced_contrast, 50, 150)
    print(f"Edges image shape: {edges.shape}")

    # Denoise the image using Non-Local Means Denoising
    denoised = restoration.denoise_nl_means(enhanced_contrast, h=1.15)
    denoised = (denoised * 255).astype(np.uint8)

    # Sharpen the image
    kernel_sharpening = np.array([[-1, -1, -1],
                                  [-1, 9, -1],
                                  [-1, -1, -1]])
    sharpened = cv2.filter2D(denoised, -1, kernel_sharpening)
    print(f"Sharpened image shape: {sharpened.shape}")

    # Combine the edges with the sharpened image
    combined = cv2.addWeighted(sharpened, 0.8, edges, 0.2, 0)
    print(f"Combined image shape: {combined.shape}")

    # Save the enhanced image
    cv2.imwrite(output_path, combined)
    print(f"Enhanced image saved to '{output_path}'")
    
    
image_path = '/dj/data/test/4a8b25f0c83e9553.jpg'
output_path = '/dj/enhanced_image_5.png'
preprocess_image(image_path, output_path)

TypeError: preprocess_image() takes 1 positional argument but 2 were given

In [16]:
import cv2
import numpy as np
from PIL import Image
import pytesseract

def rotate_and_flip_image(image, angle):
    # Rotate image
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def calculate_readability_score(image):
    # Convert the image to PIL format
    pil_image = Image.fromarray(image)
    # Use pytesseract to do OCR on the image
    text = pytesseract.image_to_string(pil_image, lang='kor')
    # Readability score based on the length of extracted text
    readability_score = len(text)
    return readability_score, text

def find_best_orientation(image_path):
    # Load the preprocessed image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    print(f"Original image shape: {image.shape}")

    best_score = 0
    best_image = None
    best_text = ""
    angles = [0,15, 30,45, 60,75, 90,105, 120,135, 150,165, 180,195, 210,225, 240,255, 270,285, 300,315, 330, 345]

    for angle in angles:
        # Rotate the image
        rotated_image = rotate_and_flip_image(image, angle)
        # Calculate readability score for the rotated image
        score, text = calculate_readability_score(rotated_image)
        print(f"Angle: {angle}, Readability Score: {score}")

        if score > best_score:
            best_score = score
            best_image = rotated_image
            best_text = text

        # Flip the image and calculate the readability score for the flipped image
        flipped_image = cv2.flip(rotated_image, 1)
        score, text = calculate_readability_score(flipped_image)
        print(f"Flipped Angle: {angle}, Readability Score: {score}")

        if score > best_score:
            best_score = score
            best_image = flipped_image
            best_text = text

    return best_image, best_text

# Example usage
image_path = '/dj/enhanced_image.png/1acbab3967fe133b_out.jpg'
best_image, best_text = find_best_orientation(image_path)

if best_image is not None:
    cv2.imwrite('dj/best_oriented_image_4.jpg', best_image)
    print("Best oriented image saved to 'dj/best_oriented_image.jpg'")
else:
    print("No suitable transformation found.")


Original image shape: (2364, 1772, 3)
Angle: 0, Readability Score: 1077
Flipped Angle: 0, Readability Score: 1440
Angle: 15, Readability Score: 3
Flipped Angle: 15, Readability Score: 3
Angle: 30, Readability Score: 3
Flipped Angle: 30, Readability Score: 3
Angle: 45, Readability Score: 3
Flipped Angle: 45, Readability Score: 3
Angle: 60, Readability Score: 3
Flipped Angle: 60, Readability Score: 3
Angle: 75, Readability Score: 40
Flipped Angle: 75, Readability Score: 17
Angle: 90, Readability Score: 110
Flipped Angle: 90, Readability Score: 133
Angle: 105, Readability Score: 3
Flipped Angle: 105, Readability Score: 3
Angle: 120, Readability Score: 3
Flipped Angle: 120, Readability Score: 3
Angle: 135, Readability Score: 3
Flipped Angle: 135, Readability Score: 3
Angle: 150, Readability Score: 3
Flipped Angle: 150, Readability Score: 3
Angle: 165, Readability Score: 69
Flipped Angle: 165, Readability Score: 13
Angle: 180, Readability Score: 1085
Flipped Angle: 180, Readability Score: 1

In [19]:
import cv2
import numpy as np

def preprocess_image_with_canny(image_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    print(f"Original image shape: {image.shape}")

    # Resize image to improve OCR accuracy
    resized_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    print(f"Resized image shape: {resized_image.shape}")

    # Convert to grayscale
    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    print(f"Gray image shape: {gray.shape}")

    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    print(f"Blurred image shape: {blurred.shape}")

    # Apply Canny edge detector
    edges = cv2.Canny(blurred, 50, 150)
    print(f"Edges image shape: {edges.shape}")

    return edges

def save_preprocessed_image(input_path, output_path):
    processed_image = preprocess_image_with_canny(input_path)
    cv2.imwrite(output_path, processed_image)
    print(f"Preprocessed image saved to '{output_path}'")

# Example usage
image_path = 'dj/data/test/0a4f2decf34d3bff.jpg'
output_path = 'dj/preprocessed_image_3.jpg'
save_preprocessed_image(image_path, output_path)


Original image shape: (591, 443, 3)
Resized image shape: (1182, 886, 3)
Gray image shape: (1182, 886)
Blurred image shape: (1182, 886)
Edges image shape: (1182, 886)
Preprocessed image saved to 'dj/preprocessed_image_3.jpg'
