# Running 

In [1]:
import zipfile as zf
files = zf.ZipFile("images.zip", 'r')
files.extractall('images')
files.close()

In [2]:
import fitz
import os
from PIL import Image

In [3]:
file_path = 'sample.pdf'

In [4]:
pdf_file = fitz.open(file_path)

In [5]:
page_nums = len(pdf_file)

In [6]:
images_list = []

In [7]:
for page_num in range(page_nums):
    page_content = pdf_file[page_num]
    images_list.extend(page_content.get_images())
    

In [8]:
print(images_list)

[(44, 0, 1429, 762, 8, 'DeviceRGB', '', 'Image1', 'DCTDecode'), (48, 0, 1000, 667, 8, 'DeviceRGB', '', 'Image1', 'DCTDecode'), (52, 0, 120, 120, 8, 'DeviceRGB', '', 'Image1', 'DCTDecode'), (53, 0, 122, 122, 8, 'DeviceRGB', '', 'Image2', 'DCTDecode'), (54, 0, 102, 153, 8, 'DeviceRGB', '', 'Image3', 'DCTDecode'), (55, 0, 1952, 976, 8, 'DeviceRGB', '', 'Image4', 'DCTDecode'), (56, 0, 1952, 976, 8, 'DeviceRGB', '', 'Image5', 'DCTDecode'), (60, 0, 180, 120, 8, 'DeviceRGB', '', 'Image1', 'DCTDecode'), (61, 0, 300, 300, 8, 'DeviceRGB', '', 'Image2', 'DCTDecode')]


In [9]:
if len(images_list) == 0:
    raise ValueError(f"No images found in (file_path)")

In [10]:
for i, image in enumerate(images_list, start=1):
    xref = image[0]
    base_image = pdf_file.extract_image(xref)
    image_bytes = base_image['image']
    image_ext = base_image['ext']
    image_name = str(i)+"."+image_ext
    with open(os.path.join('images/',image_name),'wb') as image_file:
        image_file.write(image_bytes)
        image_file.close()

In [16]:
import os

# specify the folder path
folder_path = "images/"

# get a list of all files in the folder
file_list = os.listdir(folder_path)

# filter the list to include only image files
image_list = [file for file in file_list if file.endswith(('.jpg', '.jpeg', '.png', '.gif'))]

# print the list of image files
print(image_list)


['1.jpeg', '2.jpeg', '2_deblurred.jpg', '3.jpeg', '4.jpeg', '5.jpeg', '6.jpeg', '7.jpeg', '8.jpeg', '9.jpeg']


In [17]:
from PIL import Image
# specify the folder path
# folder_path = 'images/'

# loop through all files in the folder
for filename in image_list:
    image = Image.open(os.path.join(folder_path, filename))
    # print image properties
#     image.show()
    print(filename)
    print(f"Image Format: {image.format}")
    print(f"Image Mode: {image.mode}")
    print(f"Image Size: {image.size}")
    # calculate image size in bytes
    img_bytes = image.size[0] * image.size[1] * len(image.getbands()) / 8

    print(f"Image Size in Bytes: {img_bytes}")
    print("------------------------------")


1.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (1429, 762)
Image Size in Bytes: 408336.75
------------------------------
2.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (1000, 667)
Image Size in Bytes: 250125.0
------------------------------
2_deblurred.jpg
Image Format: JPEG
Image Mode: RGB
Image Size: (1000, 667)
Image Size in Bytes: 250125.0
------------------------------
3.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (120, 120)
Image Size in Bytes: 5400.0
------------------------------
4.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (122, 122)
Image Size in Bytes: 5581.5
------------------------------
5.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (102, 153)
Image Size in Bytes: 5852.25
------------------------------
6.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (1952, 976)
Image Size in Bytes: 714432.0
------------------------------
7.jpeg
Image Format: JPEG
Image Mode: RGB
Image Size: (1952, 976)
Image Size in Bytes: 714432.0
-------------

In [19]:
#checking blurriness of image
import cv2
from scipy.signal import wiener

# loop over all image files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(('.jpg', '.jpeg', '.png', '.gif')):
        # read the image file
        img_path = os.path.join(folder_path, filename)
        img = cv2.imread(img_path)

        # calculate the image sharpness using Laplacian operator
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()
        print(sharpness)
        # determine whether the image is blurry or not
        if sharpness < 40:
            print(f'{filename} is blurry.')
            
            #try to clear the image
            # convert image to grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # apply the Wiener filter to the grayscale image
            deblurred = wiener(gray, (5, 5))
            # convert the deblurred image back to BGR format
            deblurred_bgr = cv2.cvtColor(deblurred.astype('uint8'), cv2.COLOR_GRAY2BGR)
            # save the deblurred image to a new file
            new_filename = os.path.splitext(filename)[0] + '_deblurred.jpg'
            cv2.imwrite(os.path.join(folder_path, new_filename), deblurred_bgr)
        else:
            print(f'{filename} is sharp.')
        print("\n")


135.99687206622974
1.jpeg is sharp.


37.22534589167187
2.jpeg is blurry.


1170.5783227742293
2_deblurred.jpg is sharp.


4596.734145814044
3.jpeg is sharp.


2942.4892377564856
4.jpeg is sharp.


403.0208614972744
5.jpeg is sharp.


233.812431458674
6.jpeg is sharp.


234.92917836688036
7.jpeg is sharp.


8043.364398043124
8.jpeg is sharp.


298.82671111111114
9.jpeg is sharp.


