In [1]:
# ================================
# üß™ Diagn√≥stico individual da imagem no staging-unique
# ================================

import os
import io
import numpy as np
from PIL import Image
from minio import Minio

# --- Conex√£o com MinIO ---
minio_client = Minio(
    endpoint="minio:9000",
    access_key="admin",
    secret_key="senhasegura",
    secure=False
)

# --- Par√¢metros ---
bucket = "staging-unique"
tipo_curadoria = "RL"  # ou "DL"

# Crit√©rios (ajustados para FIAP)
criterios = {
    "DL": {"min_res": (100, 100), "max_size": 2_000_000, "force_rgb": True, "allowed_exts": [".png"]},
    "RL": {"min_res": (100, 100), "max_size": 1_000_000, "force_rgb": False, "allowed_exts": [".png", ".jpg", ".jpeg"]}
}

# --- Seleciona uma imagem manualmente (exemplo direto) ---
obj = next(minio_client.list_objects(bucket, recursive=True))
response = minio_client.get_object(bucket, obj.object_name)
data = response.read()

# --- Processamento ---
img = Image.open(io.BytesIO(data))
w, h = img.size
aspect_ratio = round(w / h, 2)
mode = img.mode
file_ext = os.path.splitext(obj.object_name)[1].lower()
std_pixels = np.array(img).std()
tamanho_bytes = len(data)

# --- Crit√©rios aplicados ---
crit = criterios[tipo_curadoria]
comentario = None

if file_ext not in crit["allowed_exts"]:
    comentario = "Formato n√£o permitido"
elif w < crit["min_res"][0] or h < crit["min_res"][1]:
    comentario = "Resolu√ß√£o insuficiente"
elif tamanho_bytes > crit["max_size"]:
    comentario = "Tamanho do arquivo excede o limite"
elif crit["force_rgb"] and mode != "RGB":
    comentario = "Imagem n√£o est√° em RGB"
elif not (0.5 <= w / h <= 4.0):
    comentario = "Aspect ratio fora do intervalo"
elif std_pixels < 5:
    comentario = "Baixa vari√¢ncia de pixels"

# --- Resultado ---
print("üìÇ Arquivo:", obj.object_name)
print(f"üìê Resolu√ß√£o: {w}x{h}")
print(f"üì¶ Tamanho: {tamanho_bytes:,} bytes")
print(f"üé® Modo de cor: {mode}")
print(f"üìè Aspect ratio: {aspect_ratio}")
print(f"üìä Desvio padr√£o dos pixels: {round(std_pixels, 2)}")
print("üß™ Crit√©rios:", crit)

if comentario:
    print(f"\n‚ùå Rejeitado: {comentario}")
else:
    print("\n‚úÖ Aprovado para curadoria pesada.")


üìÇ Arquivo: FDL/BBAS3.SA/imagens/teste/comprar/2019-04-29_1.png
üìê Resolu√ß√£o: 100x333
üì¶ Tamanho: 2,420 bytes
üé® Modo de cor: L
üìè Aspect ratio: 0.3
üìä Desvio padr√£o dos pixels: 120.04
üß™ Crit√©rios: {'min_res': (100, 100), 'max_size': 1000000, 'force_rgb': False, 'allowed_exts': ['.png', '.jpg', '.jpeg']}

‚ùå Rejeitado: Aspect ratio fora do intervalo


In [11]:
# ‚úÖ Diagn√≥stico t√©cnico da tabela curation_audit (teste isolado)

from sqlalchemy import create_engine, MetaData, Table
from datetime import datetime
import json

# --- Conex√£o com PostgreSQL ---
engine = create_engine("postgresql+psycopg2://postgres:senhasegura@database-services:5432/postgres")

# --- Refletir metadados ---
metadata = MetaData()
metadata.reflect(bind=engine)

if "curation_audit" not in metadata.tables:
    raise Exception("‚ùå Tabela 'curation_audit' n√£o encontrada no banco de dados.")

table = metadata.tables["curation_audit"]
print("‚úÖ Colunas encontradas na tabela:")
print(list(table.columns.keys()))

# --- Simular um insert com dados completos (adaptar se necess√°rio) ---
test_data = {
    "full_path": "curated-unique/FDL/teste.png",
    "filename": "teste.png",
    "file_ext": ".png",
    "prefix": "FDL",
    "tipo": "DL",
    "finalidade": "pesada",
    "bucket_origem": "staging-unique",
    "bucket_destino": "curated-unique",
    "status": "processed",
    "comentario": "inser√ß√£o de teste",
    "curation_type": "DL",
    "curation_status": "processed",
    "timestamp": datetime.now(),
    "curation_details": json.dumps({
        "res_w": 100,
        "res_h": 100,
        "aspect_ratio": 1.0,
        "file_size": 2000,
        "std": 15.5,
        "modo_final": "L"
    })
}

# --- Tentativa de inser√ß√£o e captura do erro ---
try:
    with engine.begin() as conn:
        conn.execute(table.insert().values(test_data))
    print("‚úÖ Inser√ß√£o de teste bem-sucedida.")
except Exception as e:
    print("‚ùå Erro ao inserir na tabela curation_audit:")
    print(e)


‚úÖ Colunas encontradas na tabela:
['id', 'prefix', 'full_path', 'filename', 'file_ext', 'curation_type', 'curation_details', 'timestamp', 'source_path', 'bucket_origin', 'bucket_curated', 'curation_status']
‚ùå Erro ao inserir na tabela curation_audit:
Unconsumed column names: comentario, tipo, status, bucket_origem, bucket_destino, finalidade


In [3]:
# =============================
# üßπ Limpeza total do bucket curated-unique
# =============================

from minio import Minio

minio_client = Minio("minio:9000", access_key="admin", secret_key="senhasegura", secure=False)
bucket = "curated-unique"

if minio_client.bucket_exists(bucket):
    print(f"üßπ Apagando arquivos em '{bucket}'...")
    objetos = minio_client.list_objects(bucket, recursive=True)
    for obj in objetos:
        minio_client.remove_object(bucket, obj.object_name)
    print("‚úÖ Bucket limpo.")
else:
    print("‚ùå Bucket n√£o encontrado.")


üßπ Apagando arquivos em 'curated-unique'...
‚úÖ Bucket limpo.


In [5]:
# =============================
# üßπ Limpeza total da tabela curation_audit
# =============================

from sqlalchemy import create_engine, text

# Conex√£o
engine = create_engine("postgresql+psycopg2://postgres:senhasegura@database-services:5432/postgres")

with engine.begin() as conn:
    conn.execute(text("DELETE FROM curation_audit;"))
    print("‚úÖ Tabela 'curation_audit' zerada com sucesso.")


‚úÖ Tabela 'curation_audit' zerada com sucesso.


In [6]:
# =============================
# üîç Inspe√ß√£o do schema da tabela curation_audit
# =============================

from sqlalchemy import MetaData

# Conectar e refletir estrutura
metadata = MetaData()
metadata.reflect(bind=engine)
curation_table = metadata.tables["curation_audit"]

# Exibir colunas e tipos
print("üìã Colunas da tabela 'curation_audit':\n")
for col in curation_table.columns:
    nullable = "NULL OK" if col.nullable else "NOT NULL"
    print(f"‚Ä¢ {col.name:20} | {col.type} | {nullable}")


üìã Colunas da tabela 'curation_audit':

‚Ä¢ id                   | INTEGER | NOT NULL
‚Ä¢ prefix               | TEXT | NOT NULL
‚Ä¢ full_path            | TEXT | NOT NULL
‚Ä¢ filename             | TEXT | NOT NULL
‚Ä¢ file_ext             | VARCHAR(10) | NOT NULL
‚Ä¢ curation_type        | TEXT | NOT NULL
‚Ä¢ curation_details     | TEXT | NULL OK
‚Ä¢ timestamp            | TIMESTAMP | NOT NULL
‚Ä¢ source_path          | TEXT | NOT NULL
‚Ä¢ bucket_origin        | TEXT | NOT NULL
‚Ä¢ bucket_curated       | TEXT | NOT NULL
‚Ä¢ curation_status      | TEXT | NULL OK


In [8]:
# =============================
# üîé Refor√ßo: logging de valores antes do INSERT
# =============================

for obj in tqdm(objetos_validos, desc="üîÅ Debug dos objetos"):
    try:
        object_name = obj.object_name

        # Seguran√ßa: validar nome
        if not object_name or not isinstance(object_name, str) or object_name.strip() == "":
            print(f"‚ö†Ô∏è Nome inv√°lido detectado: '{object_name}' ‚Äî pulando.")
            continue

        raw_data = minio_client.get_object(bucket_origem, object_name).read()
        Image.open(io.BytesIO(raw_data))  # valida imagem

        # Upload
        minio_client.put_object(
            bucket_name=bucket_destino,
            object_name=object_name,
            data=io.BytesIO(raw_data),
            length=len(raw_data),
            content_type="image/png"
        )

        full_path = f"{bucket_destino}/{object_name}"
        source_path = f"{bucket_origem}/{object_name}"
        filename = os.path.basename(object_name)

        # Valida√ß√£o extra
        if not full_path or "None" in full_path or full_path.strip() == "":
            print(f"‚ùå full_path inv√°lido: '{full_path}' ‚Üê object_name: '{object_name}'")
            continue

        audit = {
            "prefix": object_name.split("/")[0],
            "full_path": full_path,
            "filename": filename,
            "file_ext": os.path.splitext(object_name)[1].lower(),
            "curation_type": "manual_check",
            "curation_details": json.dumps({"bytes": len(raw_data)}),
            "timestamp": datetime.now(),
            "source_path": source_path,
            "bucket_origin": bucket_origem,
            "bucket_curated": bucket_destino,
            "curation_status": "verificacao"
        }

        with engine.begin() as conn:
            conn.execute(curation_table.insert().values(audit))
            copiados += 1

    except Exception as e:
        print(f"‚ùå Erro ao copiar {object_name}: {e}")
        erros += 1


üîÅ Debug dos objetos:   0%|          | 0/100 [00:00<?, ?it/s]

‚ùå Erro ao copiar FDL/BBAS3.SA/imagens/teste/comprar/2019-04-29_1.png: (psycopg2.errors.CheckViolation) new row for relation "curation_audit" violates check constraint "curation_audit_curation_status_check"
DETAIL:  Failing row contains (110280, FDL, curated-unique/FDL/BBAS3.SA/imagens/teste/comprar/2019-04-29_1.p..., 2019-04-29_1.png, .png, manual_check, {"bytes": 2420}, 2025-06-01 11:53:36.832445+00, staging-unique/FDL/BBAS3.SA/imagens/teste/comprar/2019-04-29_1.p..., staging-unique, curated-unique, verificacao).

[SQL: INSERT INTO curation_audit (prefix, full_path, filename, file_ext, curation_type, curation_details, timestamp, source_path, bucket_origin, bucket_curated, curation_status) VALUES (%(prefix)s, %(full_path)s, %(filename)s, %(file_ext)s, %(curation_type)s, %(curation_details)s, %(timestamp)s, %(source_path)s, %(bucket_origin)s, %(bucket_curated)s, %(curation_status)s) RETURNING curation_audit.id]
[parameters: {'prefix': 'FDL', 'full_path': 'curated-unique/FDL/BBAS3.SA/i

In [9]:
# =============================
# üîç Listar restri√ß√µes CHECK da tabela curation_audit
# =============================

from sqlalchemy import inspect

insp = inspect(engine)
checks = insp.get_check_constraints("curation_audit")

print("üîé Restri√ß√µes CHECK na tabela 'curation_audit':")
for check in checks:
    print(f"‚Ä¢ {check['name']}: {check['sqltext']}")


üîé Restri√ß√µes CHECK na tabela 'curation_audit':
‚Ä¢ curation_audit_curation_status_check: curation_status = ANY (ARRAY['processed'::text, 'not_processed'::text])


In [10]:
# =============================
# ‚úÖ Vers√£o final com valores permitidos pelo CHECK constraint
# =============================

import io
import os
import json
from datetime import datetime
from PIL import Image
from minio import Minio
from sqlalchemy import create_engine, MetaData, Table
from tqdm.notebook import tqdm

# Conex√µes
minio_client = Minio("minio:9000", access_key="admin", secret_key="senhasegura", secure=False)
engine = create_engine("postgresql+psycopg2://postgres:senhasegura@database-services:5432/postgres")

# Buckets
bucket_origem = "staging-unique"
bucket_destino = "curated-unique"
if not minio_client.bucket_exists(bucket_destino):
    minio_client.make_bucket(bucket_destino)

# Refletir estrutura da tabela
metadata = MetaData()
metadata.reflect(bind=engine)
curation_table = Table("curation_audit", metadata, autoload_with=engine)

# Selecionar os 100 primeiros objetos v√°lidos
objetos = list(minio_client.list_objects(bucket_origem, recursive=True))
objetos_validos = [obj for obj in objetos if obj.object_name and obj.object_name.strip()]
objetos_validos = objetos_validos[:100]

copiados = 0
erros = 0

for obj in tqdm(objetos_validos, desc="üîÅ Copiando staging ‚Üí curated"):
    try:
        object_name = obj.object_name
        raw_data = minio_client.get_object(bucket_origem, object_name).read()
        Image.open(io.BytesIO(raw_data))  # valida imagem

        # Upload para destino
        minio_client.put_object(
            bucket_name=bucket_destino,
            object_name=object_name,
            data=io.BytesIO(raw_data),
            length=len(raw_data),
            content_type="image/png"
        )

        # Audit coerente com constraint
        audit = {
            "prefix": object_name.split("/")[0],
            "full_path": f"{bucket_destino}/{object_name}",
            "filename": os.path.basename(object_name),
            "file_ext": os.path.splitext(object_name)[1].lower(),
            "curation_type": "manual_check",
            "curation_details": json.dumps({"bytes": len(raw_data)}),
            "timestamp": datetime.now(),
            "source_path": f"{bucket_origem}/{object_name}",
            "bucket_origin": bucket_origem,
            "bucket_curated": bucket_destino,
            "curation_status": "processed"  # ‚úÖ valor aceito
        }

        with engine.begin() as conn:
            conn.execute(curation_table.insert().values(audit))
            copiados += 1

    except Exception as e:
        print(f"‚ùå Erro ao copiar {object_name}: {e}")
        erros += 1

# Relat√≥rio
print(f"\n‚úÖ Verifica√ß√£o finalizada.")
print(f"‚úîÔ∏è {copiados} imagens copiadas e auditadas com sucesso.")
print(f"‚ùå {erros} imagens com erro.")


üîÅ Copiando staging ‚Üí curated:   0%|          | 0/100 [00:00<?, ?it/s]


‚úÖ Verifica√ß√£o finalizada.
‚úîÔ∏è 100 imagens copiadas e auditadas com sucesso.
‚ùå 0 imagens com erro.
