In [7]:
import fitz
import pytesseract
from PIL import Image
import io
import os

def rotate_image(image, angle):
    """Rotates the image using PIL."""
    return image.rotate(angle, expand=True)

def detect_text_rotation(image):
    """Uses Tesseract OCR to detect text orientation, with error handling."""
    try:
        osd = pytesseract.image_to_osd(image)
        angle = int(osd.split("\n")[1].split(":")[-1].strip())  # Extract rotation angle
        return angle
    except Exception as e:
        print(f"Warning: Could not detect rotation. Error: {e}")
        return None  # Return None if detection fails

def nearest_90_degree_rotation(angle):
    """Rounds the detected angle to the nearest multiple of 90° and returns the correction."""
    rounded_angle = round(angle / 90) * 90  # Round to nearest 90°
    correction = (rounded_angle % 360)  # Convert to required counter-rotation
    return correction if correction != 360 else 0  # If 360, no rotation needed

def correct_pdf_rotation(input_pdf, output_pdf):
    """Detects rotation from images and corrects page orientation accordingly."""
    doc = fitz.open(input_pdf)
    new_doc = fitz.open()
    
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        img_list = page.get_images(full=True)

        if not img_list:
            print(f"Page {page_num+1}: No image found, copying text page.")
            new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)  # Copy text pages as they are
            continue

        try:
            # Extract the first image (assuming full-page scan)
            xref = img_list[0][0]
            base_image = doc.extract_image(xref)
            img_bytes = base_image["image"]
            img = Image.open(io.BytesIO(img_bytes))

            # Detect text rotation
            angle = detect_text_rotation(img)
            if angle is None:
                print(f"Page {page_num+1}: Skipping due to failed rotation detection.")
                new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
                continue

            # Determine correct rotation
            correction = nearest_90_degree_rotation(angle)
            print(f"Page {page_num+1}: Detected {angle}° rotation, correcting by {correction}°.")

            # Apply correction if necessary
            if correction != 0:
                # Rotate the image using PIL
                img_rotated = rotate_image(img, correction)
                
                # Save the rotated image back to a bytes object
                img_byte_arr = io.BytesIO()
                img_rotated.save(img_byte_arr, format=base_image['ext'])
                img_byte_arr = img_byte_arr.getvalue()

                # Insert the rotated image into the page
                rect = page.rect
                page.delete_image(xref)   #first delete the image
                page.insert_image(rect, stream=img_byte_arr)  # Corrected insertion method
                page.clean_contents()
                print(f"Page {page_num+1}: Image rotation applied.")

        except Exception as e:
            print(f"Page {page_num+1}: Error processing page. Skipping. Error: {e}")
            # If there is an error processing the page, we just skip it

        # Add the modified page to the new document
        new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)

    # Save the corrected PDF
    new_doc.save(output_pdf)
    print(f"Corrected PDF saved as '{output_pdf}'.")


In [8]:
# Example usage:
input_pdf = "Ooad.pdf"
output_pdf = "Ooad_corrected1.pdf"
correct_pdf_rotation(input_pdf, output_pdf)

Page 1: Detected 0° rotation, correcting by 0°.
Page 2: Detected 0° rotation, correcting by 0°.
Page 3: Detected 0° rotation, correcting by 0°.
Page 4: Detected 0° rotation, correcting by 0°.
Page 5: Detected 0° rotation, correcting by 0°.
Page 6: Detected 0° rotation, correcting by 0°.
Page 7: Detected 0° rotation, correcting by 0°.
Page 8: Detected 90° rotation, correcting by 90°.
Page 8: Image rotation applied.
Page 9: Detected 90° rotation, correcting by 90°.
Page 9: Image rotation applied.
Page 10: Detected 90° rotation, correcting by 90°.
Page 10: Image rotation applied.
Page 11: Detected 0° rotation, correcting by 0°.
Page 12: Skipping due to failed rotation detection.
Page 13: Detected 90° rotation, correcting by 90°.
Page 13: Image rotation applied.
Page 14: Detected 0° rotation, correcting by 0°.
Page 15: Detected 0° rotation, correcting by 0°.
Page 16: Detected 90° rotation, correcting by 90°.
Page 16: Image rotation applied.
Page 17: Detected 0° rotation, correcting by 0°.
