In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import os
from pdf2image import convert_from_path
import joblib
from flask import Flask, request, jsonify
import cv2  # Importe o módulo cv2

app = Flask(__name__)

def get_label_from_filename(filename):
    parts = filename.split("_")
    label = parts[0]
    return label

def convert_pdf_to_images(pdf_path, max_images=None):
    images = []
    pages = convert_from_path(pdf_path, 500, size=(800, None))

    for page in pages[:max_images]:
        images.append(np.array(page))

    return np.array(images)

def load_images(folder_path, max_images_per_pdf=None):
    images = []
    labels = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(folder_path, filename)
            pdf_images = convert_pdf_to_images(pdf_path, max_images=max_images_per_pdf)

            for image in pdf_images:
                images.append(image)
                labels.append(get_label_from_filename(filename))

    return np.array(images, dtype=object), np.array(labels)

# Carregar imagens e rótulos (limitando o número de imagens por PDF para 5)
pasta = "/content/drive/My Drive/9 set 2019/Base Dados IA2/IA PortResq/basedados_IA/"
images, labels = load_images(pasta, max_images_per_pdf=5)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Treinar o modelo
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Ajustar a forma dos dados de treinamento
common_size = (100, 100)  # Defina o tamanho comum desejado
X_train_resized = np.array([cv2.resize(x, common_size) for x in X_train], dtype=object)
X_train_flatten = np.array([x.flatten() for x in X_train_resized], dtype=object)
X_train_flatten = np.vstack(X_train_flatten)

model.fit(X_train_flatten, y_train)

# Ajustar a forma dos dados de teste
X_test_resized = np.array([cv2.resize(x, common_size) for x in X_test], dtype=object)
X_test_flatten = np.array([x.flatten() for x in X_test_resized], dtype=object)
X_test_flatten = np.vstack(X_test_flatten)

# Avaliar o modelo
accuracy = model.score(X_test_flatten, y_test)
print(f"Acurácia do modelo: {accuracy}")

# Salvar o modelo
model_filename = "/content/drive/My Drive/9 set 2019/Base Dados IA2/IA PortResq/modelo_rf.joblib"
joblib.dump(model, model_filename)
print(f"Modelo salvo em {model_filename}")

# Criar API Flask
@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    input_images = np.array(data['images'], dtype=object)
    input_images_resized = np.array([cv2.resize(x, common_size) for x in input_images], dtype=object)
    input_images_flatten = np.array([x.flatten() for x in input_images_resized], dtype=object)
    input_images_flatten = np.vstack(input_images_flatten)

    predictions = model.predict(input_images_flatten)

    return jsonify({'predictions': predictions.tolist()})

# Executar a API no modo de desenvolvimento (não usar em produção)
if __name__ == '__main__':
    app.run(debug=True)


Acurácia do modelo: 0.3333333333333333
Modelo salvo em /content/drive/My Drive/9 set 2019/Base Dados IA2/IA PortResq/modelo_rf.joblib
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
