In [26]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt 
import random

def cleanup_picture(filepath, show_images = True):
        img_src = cv2.imread(filepath)
        image = img_src.copy()
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        #Apply threshold to binarize the image
        _, binary = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
        
        # Find all contours
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        # Select the biggest bounding box detected
        max_size = 0
        x_max, y_max, w_max, h_max = 0, 0, 0, 0
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            contour_size = w*h
            if contour_size > max_size: 
                max_size = contour_size
                x_max, y_max, w_max, h_max = x, y, w, h

        # Add margin to bounding box 
        margin = 10
        image_width, image_height = 500, 500
        x = max(0, x_max - margin)
        w = min(w_max + 2 * margin, image_width - x)
        y = max(0, y_max - margin)
        h = min(h_max + 2 * margin, image_height - y)

        # draw the bounding box on original picture
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # crop picture to eliminate white background
        cropped_image = img_src[y:y+h, x:x+w]

        if(show_images):
            plt.subplot(141)
            # Display binarized image
            plt.imshow(cv2.cvtColor(img_src, cv2.COLOR_BGR2RGB))
            plt.axis("off")
    
            plt.subplot(142)
            # Display binarized image
            plt.imshow(cv2.cvtColor(binary, cv2.COLOR_BGR2RGB))
            plt.axis("off")
            
            plt.subplot(143)
            # Display image with the biggest bounding boxe
            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            plt.axis("off")
    
            plt.subplot(144)
            # Display cropped image
            plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
            plt.axis("off")
            plt.show()

        return cropped_image

directory_path = "/mnt/c/Users/karim/rakuten/images/data_raw/image_train"
temp_dir_path = "/mnt/c/Users/karim/rakuten/images/data_clean/image_train"
for filename in random.sample(os.listdir(directory_path),100):
    try:
        filepath = os.path.join(directory_path, filename)
        img = cleanup_picture(filepath, show_images=False)
        copy_filepath = os.path.join(temp_dir_path, filename)
        cv2.imwrite(copy_filepath,img)
        
    except Exception as e:
        print(f"Erreur lors de l'analyse de {filename}: {str(e)}")