In [10]:
import datetime
import os
import time

import fitz
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras._tf_keras.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras._tf_keras.keras.layers import (Activation, Conv2D, Dense, Dropout,
                                          Flatten, MaxPooling2D,Input)
from keras._tf_keras.keras.models import Sequential
from keras._tf_keras.keras.optimizers import Adam
from keras._tf_keras.keras.preprocessing.image import ImageDataGenerator
from matplotlib.animation import Animation
from PIL import Image
from tensorflow import keras
from torch import dropout

In [11]:
# Configurações
data_dir = r"J:\PROCESSO\TEMPOS_MOVIMENTOS\TRABALHOS\Automatizações\coimbra\IdentificarVelcros\Text-from-image-extraction-master\Text-from-image-extraction-master\dataset"
batch_size = 32
# batch_size = 32
img_height = 256
img_width = 256
epochs = 50
CLASS = 21
FILE_NAME = "rede_model_NEURAL_"
dirs = os.listdir(data_dir)
print(dirs)

['retangulos_pretos', 'sem_retangulos']


In [31]:
def getDateStr():
    return time.strftime("%Y%m%d_%H%M")

def getTimeMin(start, end):
    return (end - start) / 60


print("[INFO] [INICIO]: " + getDateStr() + "\n")

print("[INFO] Download dataset usando keras.preprocessing.image.ImageDataGenerator")

[INFO] [INICIO]: 20240724_1306

[INFO] Download dataset usando keras.preprocessing.image.ImageDataGenerator


In [38]:
# Verifique se o diretório existe e, se não, crie-o
model_dir = "../models/"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

In [39]:
def plot_images(images, labels_true, labels_pred):
    plt.figure(figsize=(10, 10))
    for i in range(min(25, len(images))):
        plt.subplot(5, 5, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i], cmap=plt.cm.binary)
        plt.xlabel(f"True: {labels_true[i]}\nPred: {labels_pred[i]}")
    plt.show()


In [40]:
# Geradores de dados
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.25,
)
test_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.25)
# treinar o modelo durante o preocesso, ajustar os pesos para diminuir o erro
train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=32,
    shuffle=False,
    class_mode="binary",
)
#
validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=32,
    shuffle=False,
    class_mode="binary",
)

Found 493 images belonging to 2 classes.
Found 493 images belonging to 2 classes.


In [41]:
print("[INFO] Inicializando e otimizando a Rede Neural...")
start = time.time()

early_stopping_monitor = EarlyStopping(
    monitor="val_loss", mode="min", verbose=1, patience=15
)

if train_generator.samples == 0 or validation_generator.samples == 0:
    raise ValueError(
        "Não foi encontrado dados suficientesm, por favor, adicione mais imagens."
    )

[INFO] Inicializando e otimizando a Rede Neural...


In [42]:
# Construção do modelo
model = Sequential()

# Adicione a camada de entrada
model.add(Input(shape=(img_height, img_width, 3)))

# Adicione as camadas subsequentes
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(256, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation="sigmoid"))


In [43]:
# Compile o modelo
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
print('[INFO] Summary: ')
model.summary()

[INFO] Summary: 


In [44]:
print("[INFO] Treinando a Rede...")

# atualizo valor da epoca caso o treinamento tenha finalizado antes do valor de epoca que foi iniciado

print("[INFO] Salvando modelo treinado ...")

file_date = getDateStr()
model_path = os.path.join(model_dir, FILE_NAME + file_date + ".keras")
model.save(model_path)
print(f"[INFO] modelo: {model_path} salvo!")

end = time.time()

print("[INFO] Tempo de execução da Rede Neural: %.1f min" % (getTimeMin(start, end)))

# Callbacks para parar o treinamento cedo e salvar o melhor modelo
early_stopping = EarlyStopping(monitor="val_loss", patience=10)
model_checkpoint = ModelCheckpoint("best_model.keras", save_best_only=True)

# Carregar o melhor modelo
# model.load_weights("best_model.keras")
# Carregar o melhor modelo
loaded_model = keras.models.load_model(model_path)
loaded_model = keras.models.load_model("best_model.keras")
loaded_model.summary()
# "../models/" + FILE_NAME + file_date + ".keras"

# Salvar o modelo final
model.save("retangulo_preto_detector.keras")


[INFO] Treinando a Rede...
[INFO] Salvando modelo treinado ...
[INFO] modelo: ../models/rede_model_NEURAL_20240724_1307.keras salvo!
[INFO] Tempo de execução da Rede Neural: 0.2 min


  saveable.load_own_variables(weights_store.get(inner_path))


In [45]:
def extract_images_from_pdf(pdf_path, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    doc = fitz.open(pdf_path)
    for i in range(len(doc)):
        page = doc.load_page(i)
        pix = page.get_pixmap()
        output_path = f"{output_dir}/page_{i+1}.png"
        pix.save(output_path)
    doc.close()


In [46]:
def detect_black_rectangle(image_path):
    img = keras.preprocessing.image.load_img(
        image_path, target_size=(img_height, img_width)
    )
    img_array = keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    prediction = model.predict(img_array)
    return prediction[0] > 0.2

In [47]:
def main():
    pdf_directory = (
        r"C:\Users\gabcosil\Desktop\RedeNeuralTeste\PDFBASE"  # Substitua pelo caminho da pasta com PDFs
    )
    output_dir = r"C:\Users\gabcosil\Desktop\RedeNeuralTeste\OUTPUTDIR"  # Substitua pelo caminho da pasta de saída das imagens
    results = []
    print(dirs)
    # Percorra todos os arquivos na pasta
    for pdf_file in os.listdir(pdf_directory):
        if pdf_file.lower().endswith(".pdf"):
            pdf_path = os.path.join(pdf_directory, pdf_file)
            pdf_output_dir = os.path.join(output_dir, os.path.splitext(pdf_file)[0])

            # Extraia imagens do PDF
            extract_images_from_pdf(pdf_path, pdf_output_dir)

            # Detectar retângulos pretos nas imagens extraídas
            total_black_boxes = 0
            images = []
            labels_true = []
            labels_pred = []
            for image_file in os.listdir(pdf_output_dir):
                image_path = os.path.join(pdf_output_dir, image_file)
                img = keras.preprocessing.image.load_img(
                    image_path, target_size=(img_height, img_width)
                )
                img_array = keras.preprocessing.image.img_to_array(img) / 255.0
                img_array = np.expand_dims(img_array, axis=0)
                prediction = model.predict(img_array)
                is_black_rectangle = prediction[0] > 0.1
                images.append(img)
                labels_true.append(
                    "Retângulo Preto" if is_black_rectangle else "Sem Retângulo Preto"
                )
                labels_pred.append(
                    "Retângulo Preto" if is_black_rectangle else "Sem Retângulo Preto"
                )
                results.append(
                    {
                        "Arquivo": pdf_file,
                        "Caixas Pretas": total_black_boxes,
                    }
                )
                if detect_black_rectangle(image_path):
                    total_black_boxes += 1

            # Adicione os resultados à lista
            results.append({"Arquivo": pdf_file, "Caixas Pretas": total_black_boxes})
            # plot_images(images, labels_true, labels_pred)
    # Crie um DataFrame com os resultados
    df = pd.DataFrame(results)

    # Salve os resultados em um arquivo Excel
    output_excel = "resultados_velcros.xlsx"
    df.to_excel(output_excel, index=False)
    print(f"Resultados salvos em '{output_excel}'.")


if __name__ == "__main__":
    main()


['retangulos_pretos', 'sem_retangulos']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Resultados salvos em 'resultados_velcros.xlsx'.
