This code processes uploaded photos removes text from photos that contain it, and displays a message if a photo does not contain text. After processing, the photos are replaced with the edited versions. The program allows working with different formats and changing the format of the processed photos.

In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!pip install keras-ocr

In [3]:
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import math
import numpy as np

In [27]:
from google.colab import files

In [None]:
# Upload file(s)
files.upload()

In [4]:
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
    return (x_mid, y_mid)

In [None]:
def inpaint_text(img_path, pipeline):
    img = keras_ocr.tools.read(img_path)

    prediction_groups = pipeline.recognize([img])

    mask = np.zeros(img.shape[:2], dtype="uint8")
    for box in prediction_groups[0]:
        x0, y0 = box[1][0]
        x1, y1 = box[1][1]
        x2, y2 = box[1][2]
        x3, y3 = box[1][3]

        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)

        thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))

        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,
        thickness)
        inpainted_img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)

    return(inpainted_img)

pipeline = keras_ocr.pipeline.Pipeline()

Write here the file extensions you want to process

In [49]:
file_format = (".jpg", ".jpeg", ".webp")

If you want to change the file format, enter the format as ".jpg" instead of None

In [62]:
file_output_format = None

In [None]:
def get_list_from_inbox(folder_path, file_format) -> list:
        list_with_images_path = os.listdir(folder_path)
        result = []
        for file in list_with_images_path:
            if file.lower().endswith(file_format):
                result.append(file)
                continue

        return result


list_img = get_list_from_inbox("/content", file_format)
print(list_img)

processed_counter = 0
unprocessed_counter = 0
unprocessed_files = []

for img in list_img:
    try:
        img_text_removed = inpaint_text(f"/content/{img}", pipeline)
        if file_output_format is not None:
            cv2.imwrite(f"{img.split('.')[0]}{file_output_format}", cv2.cvtColor(img_text_removed, cv2.COLOR_BGR2RGB))
            os.remove(f"/content/{img}")
        else:
            cv2.imwrite(img, cv2.cvtColor(img_text_removed, cv2.COLOR_BGR2RGB))
        processed_counter += 1
    except Exception as e:
        print(f"There is no text in the photo")
        unprocessed_counter += 1
        unprocessed_files.append(img)

print(
    f"processed = {processed_counter}\n"
    f"unprocessed = {unprocessed_counter}"
)


The photos are saved in the **/content** folder