In [2]:
pip install cryptography

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.1.2
[notice] To update, run: C:\Users\User\Desktop\ImageClassification\imageclassification\Scripts\python.exe -m pip install --upgrade pip


In [2]:
import cv2
import pytesseract
import os
import json
from PIL import Image, ImageDraw, ImageFont
from concurrent.futures import ThreadPoolExecutor
import logging
from cryptography.fernet import Fernet

logging.basicConfig(level=logging.INFO)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

key = Fernet.generate_key()
handle_encryption = Fernet(key)

all_valid_images = 'C:/Users/User/Desktop/dataset_creation/all_valid_images'
deidentified_valid_images = 'C:/Users/User/Desktop/dataset_creation/deidentified_valid_images'
reidentified_valid_images = 'C:/Users/User/Desktop/dataset_creation/reidentified_valid_images'

def handle_image_processing(all_valid_images, deidentified_valid_images, reidentified_valid_images):
    if not os.path.exists(deidentified_valid_images):
        os.makedirs(deidentified_valid_images)
    if not os.path.exists(reidentified_valid_images):
        os.makedirs(reidentified_valid_images)
    
    with ThreadPoolExecutor(max_workers=4) as executor:
        for filename in os.listdir(all_valid_images):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.bmp')):
                executor.submit(image_processing_procedure, all_valid_images, deidentified_valid_images, reidentified_valid_images, filename)

def image_processing_procedure(all_valid_images, deidentified_valid_images, reidentified_valid_images, filename):
    valid_image_location = os.path.join(all_valid_images, filename)
    deidentification_location = os.path.join(deidentified_valid_images, filename)
    image_metadata_filename = os.path.splitext(filename)[0] + '.json'
    image_metadata_location = os.path.join(reidentified_valid_images, image_metadata_filename)
    
    image_deidentification(valid_image_location, deidentification_location, image_metadata_location)
    image_reidentification(deidentification_location, image_metadata_location, os.path.join(reidentified_valid_images, 'reidentified_' + filename))

def image_deidentification(valid_image_location, deidentification_location, image_metadata_location):
    try:
        img = cv2.imread(valid_image_location)
        if img is None:
            logging.error(f"There was an error while loading the img: {valid_image_location}")
            return
        
        txt_information = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
        array_of_metadata = []
        for i in range(len(txt_information['level'])):
            x, y, width, height, text = txt_information['left'][i], txt_information['top'][i], txt_information['width'][i], txt_information['height'][i], txt_information['text'][i]
            if text.strip():
                array_of_metadata.append({'text': text, 'coords': (x, y, width, height)})
                blurry_information(img, x, y, width, height)
        
        cv2.imwrite(deidentification_location, img)
        store_encrypted_information(array_of_metadata, image_metadata_location)
    except Exception as e:
        logging.error(f"There was an error while processing the img: {e}")

def blurry_information(img, x, y, width, height):
    img[y:y+height, x:x+width] = cv2.GaussianBlur(img[y:y+height, x:x+width], (99, 99), 30)

def store_encrypted_information(array_of_metadata, image_metadata_location):
    encrypted_data = handle_encryption.encrypt(json.dumps(array_of_metadata).encode())
    with open(image_metadata_location, 'wb') as f:
        f.write(encrypted_data)
    
def image_reidentification(valid_image_location, image_metadata_location, deidentification_location):
    try:
        img = Image.open(valid_image_location)
        draw = ImageDraw.Draw(img)
        txt_font = ImageFont.load_default()

        array_of_metadata = load_encrypted_information(image_metadata_location)
        
        for metadata_item in array_of_metadata:
            text, (x, y, width, height) = metadata_item['text'], metadata_item['coords']
            draw.rectangle([x, y, x + width, y + height], outline="white", fill="white")
            draw.text((x, y), text, fill="black", font=txt_font)
        
        img.save(deidentification_location)
        print(f"The re-identified image is stored at: {deidentification_location}")
    except Exception as e:
        logging.error(f"There was an error while re-identifying the img: {e}")

def load_encrypted_information(image_metadata_location):
    with open(image_metadata_location, 'rb') as f:
        encrypted_data = f.read()
    decrypted_data = handle_encryption.decrypt(encrypted_data).decode()
    return json.loads(decrypted_data)

handle_image_processing(all_valid_images, deidentified_valid_images, reidentified_valid_images)


The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_00_05_28_6.6.2024.0.5.38_3.tif
The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_00_31_17_6.6.2024.0.31.27_1.tif
The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_00_05_28_6.6.2024.0.5.38_2.tif
The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_00_05_28_6.6.2024.0.5.38_1.tif
The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_01_42_28_6.6.2024.1.42.34_2.tif
The re-identified image is stored at: C:/Users/User/Desktop/dataset_creation/reidentified_valid_images\reidentified_06_06_2024_00_31_17_6.6.2024.0.31.27_2.tif
The re-identified image is stored at: C:/Users/Us

In [3]:
logging.basicConfig(level=logging.INFO)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

key = Fernet.generate_key()
handle_encryption = Fernet(key)

if __name__ == "__main__":
    not_correctly_deidentified = 'C:/Users/User/Desktop/dataset_creation/not_correctly_deidentified'
    second_deidentification = 'C:/Users/User/Desktop/dataset_creation/second_deidentification'
    second_reidentification = 'C:/Users/User/Desktop/dataset_creation/second_reidentification'
    
    specific_zone_coords = [
       (72, 85, 33, 11)
    ]
    
def handle_image_processing2(not_correctly_deidentified, second_deidentification, second_reidentification, specific_zone_coords):
    if not os.path.exists(second_deidentification):
        os.makedirs(second_deidentification)
    if not os.path.exists(second_reidentification):
        os.makedirs(second_reidentification)
    
    for filename in os.listdir(not_correctly_deidentified):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.bmp')):
            valid_image_location = os.path.join(not_correctly_deidentified, filename)
            deidentification_location = os.path.join(second_deidentification, filename)
            image_metadata_filename = os.path.splitext(filename)[0] + '.json'
            image_metadata_location = os.path.join(second_reidentification, image_metadata_filename)

            image_deidentification(valid_image_location, deidentification_location, image_metadata_location, specific_zone_coords)
            image_reidentification(deidentification_location, image_metadata_location, os.path.join(second_reidentification, 'reidentified_' + filename))

def image_deidentification(valid_image_location, deidentification_location, image_metadata_location, specific_zone_coords):
    try:
        img = cv2.imread(valid_image_location)

        if img is None:
            raise FileNotFoundError(f"There was an error while loading the img: {valid_image_location}")

        array_of_metadata = []
        for coord in specific_zone_coords:
            left, top, width, height = coord
            
            original_text = get_txt_from_specific_region(img, left, top, width, height)
            array_of_metadata.append({'text': original_text, 'coords': coord})
            blurry_information(img, left, top, width, height)

            cv2.rectangle(img, (left, top), (left + width, top + height), (0, 0, 0), -1)

        cv2.imwrite(deidentification_location, img)
        store_encrypted_information(array_of_metadata, image_metadata_location)

        with open(image_metadata_location, 'w') as f:
            json.dump(array_of_metadata, f)

    except Exception as e:
        logging.error(f"There was an error while processing the img: {e}")

def blurry_information(img, left, top, width, height):
    img = cv2.rectangle(img, (left, top), (left + width, top + height), (0, 0, 0), -1)
    
def store_encrypted_information(array_of_metadata, image_metadata_location):
    encrypted_data = handle_encryption.encrypt(json.dumps(array_of_metadata).encode())
    with open(image_metadata_location, 'wb') as f:
        f.write(encrypted_data)
    logging.info(f"Saved encrypted metadata to {image_metadata_location}")

def image_reidentification(valid_image_location, image_metadata_location, deidentification_location):
    try:
        img = Image.open(valid_image_location)
        draw = ImageDraw.Draw(img)
        txt_font = ImageFont.load_default()
        
        
        array_of_metadata = load_encrypted_information(image_metadata_location)
                  
        for metadata_item in array_of_metadata:
            text, (left, top, width, height) = metadata_item['text'], metadata_item['coords']
            draw.rectangle([left, top, left + width, top + height], outline="white", fill="white")
            draw.text((left, top), text, fill="black", font=txt_font)

        img.save(image_metadata_location)
        logging.info(f"Re-identified image saved at: {image_metadata_location}")
        print(f"The re-identified image is stored at: {image_metadata_location}")

    except Exception as e:
        logging.error(f"There was an error while re-identifying the img: {e}")

def load_encrypted_information(image_metadata_location):
    with open(image_metadata_location, 'rb') as f:
        encrypted_data = f.read()
    decrypted_data = handle_encryption.decrypt(encrypted_data).decode()
    return json.loads(decrypted_data)
    
handle_image_processing2(not_correctly_deidentified, second_deidentification, second_reidentification, specific_zone_coords)


ERROR:root:There was an error while processing the img: name 'get_txt_from_specific_region' is not defined
ERROR:root:There was an error while re-identifying the img: 
ERROR:root:There was an error while processing the img: name 'get_txt_from_specific_region' is not defined
ERROR:root:There was an error while re-identifying the img: 
