In [1]:
import cv2
import numpy as np

def correct_rotation(image_path):
    # Load the image
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Use edge detection to find edges in the image
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Find lines in the image using the Hough Line Transform
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)

    # Calculate the angle of the lines
    angles = []
    for line in lines:
        for rho, theta in line:
            angle = np.rad2deg(theta)
            if angle < 90:
                angles.append(angle)
            else:
                angles.append(angle - 180)

    # Calculate the median angle
    median_angle = np.median(angles)

    # Rotate the image to correct the rotation
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    return rotated

# Example usage
rotated_image = correct_rotation(r"D:\2023-2024\final project\charaf\OCR model\assets\img20240.png")

# Save the corrected image
cv2.imwrite(r"D:\2023-2024\final project\charaf\OCR model\assets\corrected_image.png", rotated_image)

# Display the corrected image
cv2.imshow(r"D:\2023-2024\final project\charaf\OCR model\assets\corrected_image.png", rotated_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [6]:
import cv2
import numpy as np

def correct_skew(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur
    gray = cv2.GaussianBlur(gray, (9, 9), 0)

    # Threshold the image
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Get the largest contour
    largest_contour = max(contours, key=cv2.contourArea)

    # Get the minimum area rectangle
    rect = cv2.minAreaRect(largest_contour)
    angle = rect[-1]

    # Adjust the angle
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    # Rotate the image
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    # Save the rotated image
    cv2.imwrite(output_path, rotated)

    # Display the original and corrected image
    cv2.imshow("Original Image", image)
    cv2.imshow("Corrected Image", rotated)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
correct_skew(r"D:\2023-2024\final project\charaf\OCR model\assets\img20240.png", r"D:\2023-2024\final project\charaf\OCR model\assets\corrected_image.png")
#correct_skew("/mnt/data/image0.png", "/mnt/data/corrected_image0.png")


In [4]:
import cv2
import numpy as np
from scipy.ndimage import interpolation as inter

def correct_skew(image, delta=1, limit=5):
    def determine_score(arr, angle):
        data = inter.rotate(arr, angle, reshape=False, order=0)
        histogram = np.sum(data, axis=1, dtype=float)
        score = np.sum((histogram[1:] - histogram[:-1]) **2, dtype=float)
        return histogram, score

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    scores = []
    angles = np.arange(-limit, limit + delta, delta)
    for angle in angles:
        histogram, score = determine_score(thresh, angle)
        scores.append(score)

    best_angle = angles[scores.index(max(scores))]

    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
    corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    return best_angle, corrected

if __name__ == '__main__':
    image_paths = [r"D:\2023-2024\final project\charaf\OCR model\assets\img20240402_15141364.png"]
    output_paths = [r"D:\2023-2024\final project\charaf\OCR model\assets\corrected_image.png"]

    for img_path, out_path in zip(image_paths, output_paths):
        image = cv2.imread(img_path)
        if image is None:
            print(f"Error: Unable to load image at {img_path}")
            continue
        angle, corrected = correct_skew(image)
        print(f'Skew angle for {img_path}:', angle)
        cv2.imwrite(out_path, corrected)
        cv2.imshow(f'Corrected Image for {img_path}', corrected)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


  data = inter.rotate(arr, angle, reshape=False, order=0)


Skew angle for D:\2023-2024\final project\charaf\OCR model\assets\img20240402_15141364.png: -2


In [5]:
import os
from pdf2image import convert_from_path
from PIL import Image

def process_file(file_path, image_folder):
    # Define the file extension
    file_ext = os.path.splitext(file_path)[1].lower()

    # Check if the file is a PDF
    if file_ext == '.pdf':
        try:
            # Convert PDF to images
            images = convert_from_path(file_path)
            base_name = os.path.splitext(os.path.basename(file_path))[0]

            # Save each page as an image
            for i, image in enumerate(images):
                image_path = os.path.join(image_folder, f"{base_name}_page_{i + 1}.png")
                image.save(image_path, 'PNG')
                return image_path
            print(f"PDF file converted and saved to {image_folder}")

        except Exception as e:
            print(f"Failed to convert PDF: {e}")

    # Check if the file is an image
    elif file_ext in ['.png', '.jpg', '.jpeg', '.gif', '.bmp']:
        try:
            # Open and save the image
            image = Image.open(file_path)
            image_path = os.path.join(image_folder, os.path.basename(file_path))
            image.save(image_path)
            print(f"Image file saved to {image_folder}")
            return image_path
        except Exception as e:
            print(f"Failed to process image: {e}")

    else:
        print("Unsupported file type")

# Example usage
file_path = r'D:\2023-2024\final project\charaf\OCR model\assets\img20240603_11080885.pdf'
image_folder = r'D:\2023-2024\final project\charaf\OCR model\assets\img'

print(process_file(file_path, image_folder))


D:\2023-2024\final project\charaf\OCR model\assets\img\img20240603_11080885_page_1.png
