## Imports

In [None]:
!pip install opencv-python seaborn

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D, AveragePooling2D, Flatten, Dense
import zipfile
from pathlib import Path

# print("TensorFlow version:", tf.__version__)
# print("GPU Available:", tf.config.list_physical_devices('GPU'))

## Config

In [None]:
IMAGE_SIZE = (32, 32)
BATCH_SIZE = 32
EPOCHS = 30
VALIDATION_SPLIT = 0.2
DATA_PATH = "src\dataset"

## Testing Dataset

In [None]:
def explore_dataset(data_path):
    for root, dirs, files in os.walk(data_path):
        level = root.replace(data_path, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:5]:
            print(f"{subindent}{file}")
        if len(files) > 5:
            print(f"{subindent}... e mais {len(files) - 5} arquivos")

explore_dataset(DATA_PATH)

## Preprocess Dataset

In [None]:
def preprocess_image(image_path, target_size=IMAGE_SIZE):
    # Carregar imagem
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # BGR para RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Remoção de ruído usando filtro Gaussiano
    image_denoised = cv2.GaussianBlur(image_rgb, (5, 5), 0)

    # Redimensionar para tamanho padrão
    image_resized = cv2.resize(image_denoised, target_size)

    # Normalização (0-1)
    image_normalized = image_resized / 255.0

    return image_normalized

## Load Samples

In [None]:
def load_sample_images(data_path, n_samples=5):
    positive_path = os.path.join(data_path, "positives")
    negative_path = os.path.join(data_path, "negatives")

    print(f"Positive path: {positive_path}")
    print(f"Negative path: {negative_path}")

    # Carregar amostras
    fig, axes = plt.subplots(2, n_samples, figsize=(15, 6))

    # Imagens com rachadura
    if os.path.exists(positive_path):
        positive_files = [f for f in os.listdir(positive_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        for i in range(min(n_samples, len(positive_files))):
            img_path = os.path.join(positive_path, positive_files[i])
            img = cv2.imread(img_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[0, i].imshow(img_rgb)
            axes[0, i].set_title(f"COM RACHADURA\n{positive_files[i]}")
            axes[0, i].axis("off")

    # Imagens sem rachadura
    if os.path.exists(negative_path):
        negative_files = [f for f in os.listdir(negative_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        for i in range(min(n_samples, len(negative_files))):
            img_path = os.path.join(negative_path, negative_files[i])
            img = cv2.imread(img_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[1, i].imshow(img_rgb)
            axes[1, i].set_title(f"SEM RACHADURA\n{negative_files[i]}")
            axes[1, i].axis("off")

    plt.tight_layout()
    plt.show()

## CNN Model

In [None]:
def create_cnn_model(input_shape):
    model = models.Sequential([
        Conv2D(filters=6, kernel_size=(5, 5), padding='valid', activation='tanh', input_shape=input_shape),
        AveragePooling2D(pool_size=(2, 2), strides=2, padding='valid'),

        Conv2D(filters=16, kernel_size=(5, 5),  padding='valid', activation='tanh'),
        AveragePooling2D(pool_size=(2, 2), strides=2,  padding='valid'),

        Flatten(),

        Dense(units=120, activation='tanh'),
        Dense(units=84, activation='tanh'),
        Dense(units=1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', 'precision', 'recall']
    )

    return model

## Load Dataset

In [None]:
def load_dataset(data_path, sample_size=None):
    # Procurar diretórios positivos e negativos (case-insensitive, plural/singular)
    positive_path = os.path.join(data_path, "Positive")
    negative_path = os.path.join(data_path, "Negative")

    # Verificar estrutura alternativa se necessário
    if not os.path.exists(positive_path):
        for root, dirs, files in os.walk(data_path):
            for d in dirs:
                if 'positive' in d.lower() or 'crack' in d.lower():
                    positive_path = os.path.join(root, d)
                    break

    if not os.path.exists(negative_path):
        for root, dirs, files in os.walk(data_path):
            for d in dirs:
                if 'negative' in d.lower() or 'no' in d.lower():
                    negative_path = os.path.join(root, d)
                    break

    print(f"Positive path found: {positive_path} (exists: {os.path.exists(positive_path)})")
    print(f"Negative path found: {negative_path} (exists: {os.path.exists(negative_path)})")

    if not os.path.exists(positive_path):
        raise ValueError(f"Diretório de imagens positivas não encontrado: {positive_path}")
    if not os.path.exists(negative_path):
        raise ValueError(f"Diretório de imagens negativas não encontrado: {negative_path}")

    images = []
    labels = []

    # Carregar imagens positivas (com rachaduras)
    if os.path.exists(positive_path):
        positive_files = [f for f in os.listdir(positive_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if sample_size:
            positive_files = positive_files[:sample_size//2]

        print(f"Carregando {len(positive_files)} imagens COM rachadura...")
        for i, filename in enumerate(positive_files):
            if i % 1000 == 0:
                print(f"Processadas {i} imagens positivas...")

            img_path = os.path.join(positive_path, filename)
            try:
                img = preprocess_image(img_path)
                images.append(img)
                labels.append(1)  # Com rachadura
            except Exception as e:
                print(f"Erro ao processar {filename}: {e}")

    # Carregar imagens negativas (sem rachaduras)
    if os.path.exists(negative_path):
        negative_files = [f for f in os.listdir(negative_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if sample_size:
            negative_files = negative_files[:sample_size//2]

        print(f"Carregando {len(negative_files)} imagens SEM rachadura...")
        for i, filename in enumerate(negative_files):
            if i % 1000 == 0:
                print(f"Processadas {i} imagens negativas...")

            img_path = os.path.join(negative_path, filename)
            try:
                img = preprocess_image(img_path)
                images.append(img)
                labels.append(0)  # Sem rachadura
            except Exception as e:
                print(f"Erro ao processar {filename}: {e}")

    if not images:
        raise ValueError("Nenhuma imagem foi carregada. Verifique o caminho do dataset.")

    X = np.array(images)
    y = np.array(labels)

    print(f"\nDataset carregado com sucesso!")
    print(f"Total de imagens: {len(X)}")
    print(f"Imagens com rachadura: {np.sum(y)}")
    print(f"Imagens sem rachadura: {len(y) - np.sum(y)}")
    print(f"Shape das imagens: {X.shape}")
    print(f"Shape dos labels: {y.shape}")

    return X, y

# X, y = load_dataset(DATA_PATH, sample_size=10000)
X, y = load_dataset(DATA_PATH)  # Para dataset completo

# Training and Testing

In [None]:
def prepare_data(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42, stratify=y
    )

    print(f"Conjunto de treino: {len(X_train)} imagens")
    print(f"Conjunto de teste: {len(X_test)} imagens")
    print(f"Distribuição treino - Com rachadura: {np.sum(y_train)}, Sem rachadura: {len(y_train) - np.sum(y_train)}")
    print(f"Distribuição teste - Com rachadura: {np.sum(y_test)}, Sem rachadura: {len(y_test) - np.sum(y_test)}")

    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = prepare_data(X, y)
model = create_cnn_model(X_train.shape[1:])

## Data Augumentation

In [None]:
def create_data_generators(X_train, y_train, batch_size=BATCH_SIZE):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        validation_split=VALIDATION_SPLIT
    )

    train_generator = datagen.flow(
        X_train, y_train,
        batch_size=batch_size,
        subset='training',
        seed=42
    )

    validation_generator = datagen.flow(
        X_train, y_train,
        batch_size=batch_size,
        subset='validation',
        seed=42
    )

    return train_generator, validation_generator

## Training

In [None]:
def train_model(model, X_train, y_train, use_augmentation=True):
    # Callbacks
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        )
    ]

    if use_augmentation:
        # Treinamento com data augmentation
        train_gen, val_gen = create_data_generators(X_train, y_train)

        history = model.fit(
            train_gen,
            epochs=EPOCHS,
            validation_data=val_gen,
            callbacks=callbacks,
            verbose=1
        )
    else:
        # Treinamento sem data augmentation
        history = model.fit(
            X_train, y_train,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            validation_split=VALIDATION_SPLIT,
            callbacks=callbacks,
            verbose=1
        )

    return history

history = train_model(model, X_train, y_train, use_augmentation=True)

## Model Review

In [None]:
def evaluate_model(model, X_test, y_test, history):
    # Predições
    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()

    # Métricas
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n=== RESULTADOS DA AVALIAÇÃO ===")
    print(f"Acurácia no conjunto de teste: {accuracy:.4f} ({accuracy*100:.2f}%)")

    # Verificar se atende ao requisito de 80%
    if accuracy >= 0.8:
        print("✅ APROVADO: Acurácia >= 80%")
    else:
        print("❌ REPROVADO: Acurácia < 80%")

    # Relatório detalhado
    print("\n=== RELATÓRIO DE CLASSIFICAÇÃO ===")
    print(classification_report(y_test, y_pred,
                              target_names=['Sem Rachadura', 'Com Rachadura']))

    # Matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Sem Rachadura', 'Com Rachadura'],
                yticklabels=['Sem Rachadura', 'Com Rachadura'])
    plt.title('Matriz de Confusão')
    plt.ylabel('Valor Real')
    plt.xlabel('Predição')
    plt.show()

    # Plotar histórico de treinamento
    plot_training_history(history)

    return accuracy, y_pred_proba

def plot_training_history(history):
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))

    # Acurácia
    axes[0, 0].plot(history.history['accuracy'], label='Treino')
    axes[0, 0].plot(history.history['val_accuracy'], label='Validação')
    axes[0, 0].set_title('Acurácia')
    axes[0, 0].set_xlabel('Época')
    axes[0, 0].set_ylabel('Acurácia')
    axes[0, 0].legend()
    axes[0, 0].grid(True)

    # Loss
    axes[0, 1].plot(history.history['loss'], label='Treino')
    axes[0, 1].plot(history.history['val_loss'], label='Validação')
    axes[0, 1].set_title('Loss')
    axes[0, 1].set_xlabel('Época')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)

    # Precisão
    if 'precision' in history.history:
        axes[1, 0].plot(history.history['precision'], label='Treino')
        axes[1, 0].plot(history.history['val_precision'], label='Validação')
        axes[1, 0].set_title('Precisão')
        axes[1, 0].set_xlabel('Época')
        axes[1, 0].set_ylabel('Precisão')
        axes[1, 0].legend()
        axes[1, 0].grid(True)

    # Recall
    if 'recall' in history.history:
        axes[1, 1].plot(history.history['recall'], label='Treino')
        axes[1, 1].plot(history.history['val_recall'], label='Validação')
        axes[1, 1].set_title('Recall')
        axes[1, 1].set_xlabel('Época')
        axes[1, 1].set_ylabel('Recall')
        axes[1, 1].legend()
        axes[1, 1].grid(True)

    plt.tight_layout()
    plt.show()

accuracy, predictions = evaluate_model(model, X_test, y_test, history)