# VGG16 com Validação Cruzada
Este notebook aplica a rede **VGG16** utilizando os dados carregados pelo `DataLoader`, avaliando o desempenho via **validação cruzada (K-Fold)**.

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import os
import cv2

from get_data_from_db import DataLoader

# Inicializa o DataLoader
data_loader = DataLoader()

# Carrega os dados de treino com labels
train_data_with_labels = data_loader.get_train_data()
print(f"Total de amostras de treino: {len(train_data_with_labels)}")

IMG_SIZE = (224, 224)  # padrão para VGG16
X, y = [], []

# Reconstruindo caminho absoluto corretamente
for filename, label in train_data_with_labels:
    if label == 0:
        filepath = os.path.join(data_loader.train_normal_path, filename)
    elif label == 1:
        filepath = os.path.join(data_loader.train_ulcerative_colitis_path, filename)
    elif label == 2:
        filepath = os.path.join(data_loader.train_polyps_path, filename)
    elif label == 3:
        filepath = os.path.join(data_loader.train_esophagitis_path, filename)
    else:
        continue

    img = cv2.imread(filepath)
    if img is None:
        print(f"[AVISO] Não foi possível carregar: {filepath}")
        continue
    
    img = cv2.resize(img, IMG_SIZE)
    X.append(img)
    y.append(label)

X = np.array(X, dtype="float32") / 255.0
y = to_categorical(np.array(y))

print(f"Formato de X: {X.shape}")
print(f"Formato de y: {y.shape}")


In [None]:

def create_vgg_model(input_shape=(224, 224, 3), num_classes=4):
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
    base_model.trainable = False  # Transfer learning (congela base)
    
    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax")
    ])
    
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model


In [None]:

kf = KFold(n_splits=3, shuffle=True, random_state=42)

accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
    print(f"Treinando fold {fold+1}...")
    
    x_train_cv, x_val_cv = X[train_idx], X[val_idx]
    y_train_cv, y_val_cv = y[train_idx], y[val_idx]
    
    model = create_vgg_model(input_shape=(224, 224, 3), num_classes=y.shape[1])
    model.fit(x_train_cv, y_train_cv, 
              epochs=5, batch_size=32, 
              validation_data=(x_val_cv, y_val_cv), verbose=1)
    
    _, acc = model.evaluate(x_val_cv, y_val_cv, verbose=0)
    accuracies.append(acc)
    
print("Acurácias por fold:", accuracies)
print("Acurácia média:", np.mean(accuracies))
