In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
alessiocorrado99_animals10_path = kagglehub.dataset_download('alessiocorrado99/animals10')

print('Data source import complete.')


In [None]:
# Importando bibliotecas necessárias
import kagglehub as kh

# Import libraries
import os  # For interacting with the file system
import shutil  # For managing files and directories in a cross-platform manner
import keras  # For building deep learning models
import numpy as np  # For numerical operations on arrays
import pandas as pd # For data manipulation
from glob import glob  # For finding file paths
from tqdm import tqdm  # For progress bars

# Data visualization
import seaborn as sns  # For statistical visualizations
import plotly.graph_objs as go  # For interactive visualizations
import matplotlib.pyplot as plt  # For creating static plots

# Tensorflow
import tensorflow as tf

# Model architecture
from keras import Sequential  # For building sequential models
from keras.models import load_model  # For loading pre-trained models
from keras.layers import Dense, GlobalAvgPool2D as GAP, Dropout  # For defining model layers

# Training callbacks
from keras.callbacks import ModelCheckpoint, EarlyStopping

# Pre-trained models
from tensorflow.keras.applications import InceptionV3, ResNet152V2, Xception

# Models
from tensorflow.keras import layers, models

# Model visualization
from tensorflow.keras.utils import plot_model

# Data preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Ignore warnings
import warnings

# Image preprocessing for K-Means Model
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from skimage import color
from skimage import io
from skimage import img_as_ubyte
from skimage.transform import resize
from scipy.stats import mode
from skimage.feature import hog
from sklearn.decomposition import PCA


In [None]:
warnings.filterwarnings('ignore')

# Download latest version
path = kh.dataset_download("alessiocorrado99/animals10")

print("Caminho para os arquivos:", path)

# Get a list of class names from the data path
data_path = f"{path}/raw-img"

class_names = sorted(os.listdir(data_path))

# Count the number of classes
num_classes = len(class_names)

# Print the class names and the total number of classes
print("Nomes das classes: \n", class_names)
print("Número de classes:", num_classes)

# Get the number of samples in each class
class_sizes = []
for name in class_names:
    class_size = len(os.listdir(data_path + "/" + name))
    class_sizes.append(class_size)

# Print the class distribution
print("Distribuição das classes:\n", class_sizes)

# to convert lists to dictionary
class_name_size = dict(zip(class_names, class_sizes))


In [None]:
# Define the data
data = go.Pie(labels=class_names, values=class_sizes)

# Define the layout
layout = go.Layout(title={"text": "Class Distribution", "x": 0.5})

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the figure
fig.show()


In [None]:
# Plot a bar graph of the number of images in each class

# Set the size of the figure
plt.figure(figsize=(10,5))

# Plot a bar chart using the class names as the x-axis and class sizes as the y-axis
sns.barplot(x=class_names, y=class_sizes)

# Add a grid to the plot
plt.grid()

# Add a horizontal line to show the mean number of images across all classes
plt.axhline(np.mean(class_sizes), color='black', linestyle=':', label="Average number of images per class")

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
# Set the path to the directory where the sampled data will be saved
sampled_data_path = './sampled-data'

# Create the sampled data directory if it doesn't exist
if not os.path.exists(sampled_data_path):
    os.mkdir(sampled_data_path)

# Set the percentage of each class to sample
sample_percent = 0.1

# Define a dictionary that maps the original class names to their English names
class_names_dict = {
    'cane': 'dog',
    'cavallo': 'horse',
    'elefante': 'elephant',
    'farfalla': 'butterfly',
    'gallina': 'chicken',
    'gatto': 'cat',
    'mucca': 'cow',
    'pecora': 'sheep',
    'ragno': 'spider',
    'scoiattolo': 'squirrel'
}

# Loop through each class directory and copy 2000 images or less to the sampled data directory
for class_name in os.listdir(data_path):
    # Get the path to the original class directory
    class_path = os.path.join(data_path, class_name)
    # Get the English name of the class
    class_name_en = class_names_dict[class_name]
    # Get the path to the sampled class directory
    sampled_class_path = os.path.join(sampled_data_path, class_name_en)
    # Create the sampled class directory and fill it up only if it doesn't exist
    if not os.path.exists(sampled_class_path):
        os.mkdir(sampled_class_path)
        # Get a list of all the image files in the class directory
        image_files = os.listdir(class_path)
        # Calculate the number of images to sample
        image_class_size = class_name_size[class_name]
        if image_class_size > 2000:
            num_images = 2000
        else:
            num_images = int(image_class_size)
        # Sample the images
        sampled_images = np.random.choice(image_files, size=num_images, replace=False)
        # Copy the sampled images to the sampled class directory
        for image_name in sampled_images:
            src_path = os.path.join(class_path, image_name)
            dst_path = os.path.join(sampled_class_path, image_name)
            shutil.copyfile(src_path, dst_path)


In [None]:
# Get a list of class names from the sampled data directory
class_names = sorted(os.listdir(sampled_data_path))

# Get the number of samples in each class
class_sizes = []
for name in class_names:
    # Get the number of samples in the class directory
    class_size = len(os.listdir(os.path.join(sampled_data_path, name)))
    class_sizes.append(class_size)

# Print the class distribution
print("Class Distribution:\n", class_sizes)


In [None]:
# Define the data
data = go.Pie(labels=class_names, values=class_sizes)

# Define the layout
layout = go.Layout(title={"text": "Class Distribution", "x": 0.5})

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the figure
fig.show()


In [None]:
# Initialize Generator with the specified image transformations and preprocessing
# rescale: normalizes pixel values from 0-255 to 0-1
# horizontal_flip: randomly flips images horizontally
# vertical_flip: randomly flips images vertically
# rotation_range: randomly rotates images by a given range in degrees
# validation_split: splits the data into training and validation sets, with 20% of the data used for validation
data_generator = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=20,
    validation_split=0.2
)

# Load training data from the specified directory and apply the generator
# target_size: resizes the images to a specified size
# class_mode: specifies the type of label encoding, binary for 2 classes
# batch_size: specifies the number of samples per batch
# shuffle: shuffles the data after each epoch
# subset: specifies the subset of data to load, in this case, the training set
train_data_cnn = data_generator.flow_from_directory(
    sampled_data_path,
    target_size=(256,256),
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    subset='training'
)

# Load validation data from the specified directory and apply the generator
# subset: specifies the subset of data to load, in this case, the validation set
valid_data_cnn = data_generator.flow_from_directory(
    sampled_data_path,
    target_size=(256,256),
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    subset='validation'
)

# Dados de treino
train_data_ann = data_generator.flow_from_directory(
    sampled_data_path,
    target_size=(256, 256),
    batch_size=32,
    shuffle=True,
    subset='training',
    class_mode='categorical'  # Ajuste para gerar rótulos *one-hot*
)

# Dados de validação
valid_data_ann = data_generator.flow_from_directory(
    sampled_data_path,
    target_size=(256, 256),
    batch_size=32,
    shuffle=True,
    class_mode='categorical',  # Ajuste para gerar rótulos *one-hot*
    subset='validation'
)


In [None]:
def show_image(image, image_title=None):

    # Display the image
    plt.imshow(image)

    # Set the title of the plot if provided
    plt.title(image_title)

    # Turn off the axes in the plot
    plt.axis('off')

def get_random_data(data_tuple):

    images, labels = data_tuple
    # get a random index for an image in the dataset
    idx = np.random.randint(len(images))

    # select the image and its corresponding label using the random index
    image, label = images[idx], labels[idx]

    # return the selected image and label
    return image, label


In [None]:
# Set the figure size for the plot
plt.figure(figsize=(20,20))

# Initialize a counter for the subplots
counter=1

# Loop over the train dataset
for images, labels in iter(train_data_cnn):

    # Get a random image and label
    image, label = get_random_data([images, labels])

    # Plot the image with its class name as the title
    plt.subplot(5,5,counter)
    show_image(image, image_title=f"Class : {class_names[int(label)]}")

    # Increment the counter
    counter+=1

    # End the loop when 25 images have been plotted
    if counter>=26: break

# Adjust the layout and display the plot
plt.tight_layout()
plt.show()


In [None]:
# Função de treino/validação do modelo
def fit_model_cnn(model, name, epochs=5):
    # Set up the EarlyStopping and ModelCheckpoint callbacks to monitor the training process and save the best model weights.
    cbs = [
      EarlyStopping(patience=3, restore_best_weights=True),
      ModelCheckpoint(name + ".keras", save_best_only=True)
    ]

    history = model.fit(
        train_data_cnn, validation_data=valid_data_cnn,
        epochs=epochs, callbacks=cbs
    )

    return history

def fit_model_ann(model, epochs=5):
    cbs = [
      EarlyStopping(patience=3, restore_best_weights=True),
      ModelCheckpoint("AnnAnimalImageClassifier.keras", save_best_only=True)
    ]

    history = model.fit(
        train_data_ann, validation_data=valid_data_ann,
        epochs=epochs, callbacks=cbs
    )

    return history


In [None]:
# Construindo um modelo ANN
model = models.Sequential([
    # Achatar a entrada (imagens de 256x256x3 para um vetor unidimensional)
    layers.Flatten(input_shape=(256, 256, 3)),

    # Camadas densas
    layers.Dense(512, activation='relu'),  # Primeira camada totalmente conectada
    layers.Dropout(0.5),  # Dropout para evitar overfitting

    layers.Dense(256, activation='relu'),  # Segunda camada
    layers.Dropout(0.5),

    layers.Dense(128, activation='relu'),  # Terceira camada
    layers.Dropout(0.3),

    # Camada de saída (para classificação multi-classe)
    layers.Dense(num_classes, activation='softmax')
])

# Resumo do modelo
model.summary()

# Compilando o modelo
model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),  # Perda para classificação multi-classe
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

# Treinando o modelo
model_history = fit_model_ann(model)


In [None]:
# Função para processar uma imagem e garantir que tenha 3 canais (RGB) e um tamanho fixo
def process_image(image_path, target_size=(128, 128)):
    try:
        # Carrega a imagem
        image = io.imread(image_path)

        # Converte para RGB se necessário
        if len(image.shape) == 2:  # Tons de cinza
            image = color.gray2rgb(image)
        elif image.shape[-1] == 4:  # RGBA
            image = color.rgba2rgb(image)

        # Redimensiona a imagem para o tamanho desejado
        image = resize(image, target_size, anti_aliasing=True)

        # Converte para uint8 caso necessário
        image = img_as_ubyte(image)

        return image
    except Exception as e:
        print(f"Erro ao processar {image_path}: {e}")
        return None

# Caminho das imagens no diretório amostrado
processed_images = []
labels = []

for class_name in class_names:
    class_path = os.path.join(sampled_data_path, class_name)
    for image_file in os.listdir(class_path):
        image_path = os.path.join(class_path, image_file)

        # Processa a imagem
        image = process_image(image_path)

        if image is not None:
            processed_images.append(image)
            labels.append(class_name)

print(f"Total de imagens processadas: {len(processed_images)}")

# Converter para NumPy array
processed_images = np.array(processed_images)
labels = np.array(labels)

# Normalizar os dados
processed_images = processed_images / 255.0

# Codificar rótulos com LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Divisão do dataset em conjunto de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(
    processed_images.reshape(len(processed_images), -1),  # Flatten as imagens
    labels_encoded,
    test_size=0.2,
    random_state=42,
    stratify=labels_encoded
)

print(f"Formato do conjunto de treinamento: {X_train.shape}, {y_train.shape}")
print(f"Formato do conjunto de teste: {X_test.shape}, {y_test.shape}")

# Usar MiniBatchKMeans para clustering
# Definir o número de clusters igual ao número de classes
n_clusters = len(class_names)  # Número de classes
kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=42, batch_size=100)
kmeans.fit(X_train)

# Predizer os clusters para o conjunto de teste
y_pred = kmeans.predict(X_test)

# Ajustar rótulos de cluster para coincidir com as classes reais
labels_map = {}
for cluster_idx in range(n_clusters):
    mask = (kmeans.labels_ == cluster_idx)
    true_labels = y_train[mask]
    if len(true_labels) > 0:
        # Corrigir acesso ao atributo "mode"
        labels_map[cluster_idx] = mode(true_labels, keepdims=True).mode[0]

# Mapeando as predições para os rótulos corretos
y_pred_mapped = np.array([labels_map[cluster] for cluster in y_pred])

# Decodificar os rótulos de volta para os nomes das classes
y_pred_mapped = label_encoder.inverse_transform(y_pred_mapped)
y_test_decoded = label_encoder.inverse_transform(y_test)

# Avaliar o desempenho do modelo
accuracy = accuracy_score(y_test_decoded, y_pred_mapped)
print(f"Acurácia do MiniBatchKMeans: {accuracy:.2f}")


In [None]:
# Função para extrair recursos usando HOG
def extract_features(images):
    features = []
    for image in images:
        # Ajustar para o uso de channel_axis em vez de multichannel
        feature = hog(
            image,
            orientations=9,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            channel_axis=-1  # Especifica que os canais estão no último eixo
        )
        features.append(feature)
    return np.array(features)

# Reduzir dimensionalidade com PCA
def apply_pca(features, n_components=50):
    pca = PCA(n_components=n_components, random_state=42)
    reduced_features = pca.fit_transform(features)
    return reduced_features, pca

# Pré-processamento
print("Extraindo recursos com HOG...")
hog_features = extract_features(processed_images)

print("Reduzindo dimensionalidade com PCA...")
X_reduced, pca = apply_pca(hog_features)

# Divisão do dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced,
    labels_encoded,
    test_size=0.2,
    random_state=42,
    stratify=labels_encoded
)

print(f"Formato do conjunto de treinamento: {X_train.shape}, {y_train.shape}")
print(f"Formato do conjunto de teste: {X_test.shape}, {y_test.shape}")

# Usar MiniBatchKMeans
n_clusters = len(class_names)
kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=42, batch_size=100, max_iter=500)
kmeans.fit(X_train)

# Ajustar rótulos de cluster para coincidir com as classes reais
labels_map = {}
for cluster_idx in range(n_clusters):
    mask = (kmeans.labels_ == cluster_idx)
    true_labels = y_train[mask]
    if len(true_labels) > 0:
        labels_map[cluster_idx] = mode(true_labels, keepdims=True).mode[0]

# Predizer e mapear os clusters para as classes reais
y_pred = kmeans.predict(X_test)
y_pred_mapped = np.array([labels_map[cluster] for cluster in y_pred])

# Decodificar os rótulos
y_pred_mapped = label_encoder.inverse_transform(y_pred_mapped)
y_test_decoded = label_encoder.inverse_transform(y_test)

# Avaliar o desempenho
accuracy = accuracy_score(y_test_decoded, y_pred_mapped)
print(f"Acurácia do MiniBatchKMeans após otimização: {accuracy:.2f}")


In [None]:
# Specify the name of the model as "Inception".
name = "Inception"

# Load the pre-trained InceptionV3 model, freeze its weights and exclude its final classification layer.
base_model = InceptionV3(include_top=False, input_shape=(256,256,3), weights='imagenet')
base_model.trainable = False

# Create a sequential model with the InceptionV3 base model, a global average pooling layer, two fully connected layers, and a final softmax classification layer.
inception = Sequential([
    base_model,
    GAP(),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(num_classes, activation='softmax')
], name=name)

# Compile the model with sparse categorical cross-entropy as the loss function, Adam optimizer and accuracy as the evaluation metric.
inception.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model using the training and validation datasets, using 50 epochs and the previously defined callbacks.
history_inception = fit_model_cnn(inception, name)


In [None]:
# Specify the name of the model as "Xception".
name = "Xception"

# Load the pre-trained Xception model, freeze its weights and exclude its final classification layer.
base_model = Xception(include_top=False, input_shape=(256,256,3), weights='imagenet')
base_model.trainable = False

# Create a sequential model with the Xception base model, a global average pooling layer, two fully connected layers, and a final softmax classification layer.
xception = Sequential([
    base_model,
    GAP(),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(num_classes, activation='softmax')
], name=name)

# Compile the model with sparse categorical cross-entropy as the loss function, Adam optimizer and accuracy as the evaluation metric.
xception.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model using the training and validation datasets, using 50 epochs and the previously defined callbacks.
history_xception = fit_model_cnn(xception, name)


In [None]:
plot_model(model, show_shapes = True)


In [None]:
plot_model(inception, show_shapes = True)


In [None]:
plot_model(xception, show_shapes = True)


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_loss = model_history.history['loss']
val_loss = model_history.history['val_loss']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training Loss')
sns.lineplot(x=epoch, y=val_loss, label='Validation Loss')

# Personalizar o gráfico
plt.title('Training and Validation Loss\n')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_loss = history_inception.history['loss']
val_loss = history_inception.history['val_loss']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training Loss')
sns.lineplot(x=epoch, y=val_loss, label='Validation Loss')

# Personalizar o gráfico
plt.title('Training and Validation Loss\n')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_loss = history_xception.history['loss']
val_loss = history_xception.history['val_loss']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training Loss')
sns.lineplot(x=epoch, y=val_loss, label='Validation Loss')

# Personalizar o gráfico
plt.title('Training and Validation Loss\n')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_acc = model_history.history['accuracy']
val_acc = model_history.history['val_accuracy']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training accuracy')
sns.lineplot(x=epoch, y=val_loss, label='Validation accuracy')

# Personalizar o gráfico
plt.title('Training and Validation Accuracy\n')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_acc = history_inception.history['accuracy']
val_acc = history_inception.history['val_accuracy']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training accuracy')
sns.lineplot(x=epoch, y=val_loss, label='Validation accuracy')

# Personalizar o gráfico
plt.title('Training and Validation Accuracy\n')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
plt.style.use('fivethirtyeight')

# Dados de treinamento e validação
train_acc = history_xception.history['accuracy']
val_acc = history_xception.history['val_accuracy']
epoch = range(1, len(train_loss) + 1)

# Plot com sns.lineplot
sns.lineplot(x=epoch, y=train_loss, label='Training accuracy')
sns.lineplot(x=epoch, y=val_loss, label='Validation accuracy')

# Personalizar o gráfico
plt.title('Training and Validation Accuracy\n')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.show()


In [None]:
# Calculando a matriz de confusão
conf_matrix = metrics.confusion_matrix(y_test_decoded, y_pred_mapped, labels=label_encoder.classes_)

# Exibindo a matriz de confusão como um heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
    conf_matrix,
    annot=True, fmt="d", cmap="Blues",
    xticklabels=label_encoder.classes_,
    yticklabels=label_encoder.classes_
)
plt.title("Matriz de Confusão")
plt.xlabel("Classes Preditas")
plt.ylabel("Classes Verdadeiras")
plt.show()

# Gerando o classification_report
class_report = metrics.classification_report(y_test_decoded, y_pred_mapped, target_names=label_encoder.classes_)
print("Relatório de Classificação:\n")
print(class_report)


In [None]:
# Obter rótulos verdadeiros e predições no conjunto de validação
valid_data_ann.reset()  # Reinicia o gerador para evitar problemas de indexação
y_true = valid_data_ann.classes  # Rótulos verdadeiros no conjunto de validação
y_pred_proba = model.predict(valid_data_ann)  # Probabilidades previstas pelo modelo
y_pred = np.argmax(y_pred_proba, axis=1)  # Convertendo probabilidades para classes previstas

# Decodificar os rótulos para exibir os nomes das classes
class_names = list(valid_data_ann.class_indices.keys())  # Nome das classes do gerador
y_true_decoded = [class_names[idx] for idx in y_true]
y_pred_decoded = [class_names[idx] for idx in y_pred]

# Matriz de Confusão
conf_matrix = metrics.confusion_matrix(y_true, y_pred)

# Exibir Matriz de Confusão como heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=class_names,
    yticklabels=class_names
)
plt.title("Matriz de Confusão - Modelo ANN")
plt.xlabel("Classes Preditas")
plt.ylabel("Classes Verdadeiras")
plt.show()

# Relatório de Classificação
class_report = metrics.classification_report(y_true, y_pred, target_names=class_names)
print("Relatório de Classificação - Modelo ANN:\n")
print(class_report)


In [None]:
# Obter rótulos verdadeiros e predições no conjunto de validação
valid_data_cnn.reset()  # Reinicia o gerador para evitar problemas de indexação


In [None]:
y_true = valid_data_cnn.classes  # Rótulos verdadeiros no conjunto de validação
y_pred_proba = inception.predict(valid_data_cnn)  # Probabilidades previstas pelo modelo
y_pred = np.argmax(y_pred_proba, axis=1)  # Convertendo probabilidades para classes previstas

# Decodificar os rótulos para exibir os nomes das classes
class_names = list(valid_data_cnn.class_indices.keys())  # Nome das classes do gerador
y_true_decoded = [class_names[idx] for idx in y_true]
y_pred_decoded = [class_names[idx] for idx in y_pred]

# Matriz de Confusão
conf_matrix = metrics.confusion_matrix(y_true, y_pred)

# Exibir Matriz de Confusão como heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=class_names,
    yticklabels=class_names
)
plt.title("Matriz de Confusão - Modelo CNN InceptionV3")
plt.xlabel("Classes Preditas")
plt.ylabel("Classes Verdadeiras")
plt.show()

# Relatório de Classificação
class_report = metrics.classification_report(y_true, y_pred, target_names=class_names)
print("Relatório de Classificação - Modelo CNN InceptionV3:\n")
print(class_report)


In [None]:
y_true = valid_data_cnn.classes  # Rótulos verdadeiros no conjunto de validação
y_pred_proba = xception.predict(valid_data_cnn)  # Probabilidades previstas pelo modelo
y_pred = np.argmax(y_pred_proba, axis=1)  # Convertendo probabilidades para classes previstas

# Decodificar os rótulos para exibir os nomes das classes
class_names = list(valid_data_cnn.class_indices.keys())  # Nome das classes do gerador
y_true_decoded = [class_names[idx] for idx in y_true]
y_pred_decoded = [class_names[idx] for idx in y_pred]

# Matriz de Confusão
conf_matrix = metrics.confusion_matrix(y_true, y_pred)

# Exibir Matriz de Confusão como heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=class_names,
    yticklabels=class_names
)
plt.title("Matriz de Confusão - Modelo CNN Xception")
plt.xlabel("Classes Preditas")
plt.ylabel("Classes Verdadeiras")
plt.show()

# Relatório de Classificação
class_report = metrics.classification_report(y_true, y_pred, target_names=class_names)
print("Relatório de Classificação - Modelo CNN Xception:\n")
print(class_report)


In [None]:
# Teste t independente para verificar se existe diferença entre as acurácias médias dos modelos de CNN InceptionV3 e Xception, para alfa = 5%:
# H0: AccMediaInception = AccMediaXception VS H1: AccMediaInception < AccMediaXception;

# Coletar as acurácias de validação:
acc_inception = history_inception.history['val_accuracy']
acc_xception = history_xception.history['val_accuracy']

from scipy.stats import ttest_ind

t_stat, p_value = ttest_ind(acc_xception, acc_inception)

print("estatística do teste: " + t_stat + ", p-valor:", p_value)

if p_value < 0,05:
    print("O modelo Xception possui maior acurácia média.")
else:
    print("Não é possível concluir que existem diferenças significativas nas acurácias médias dos modelos de CNN.")


In [None]:
# Código para implementar a aplicação web com o melhor modelo escolhido dos 4 (provavelmente o InceptionV3):
from tensorflow.keras.models import load_model
import streamlit as st

st.header('Image Classification Model')
best_model = load_model('caminho/ate/o/arquivo/do/modelo/salvo/Xception.keras')
data_cat = class_names_dict.values()
img_height = 180
img_width = 180
image = st.text_input('Enter Image name','Exemplo_Animal.jpg')

image_load = tf.keras.utils.load_img(image, target_size=(img_height,img_width))
img_arr = tf.keras.utils.array_to_img(image_load)
img_bat=tf.expand_dims(img_arr,0)

predict = best_model.predict(img_bat)

score = tf.nn.softmax(predict)
st.image(image, width=200)
st.write('Veg/Fruit in image is ' + data_cat[np.argmax(score)])
st.write('With accuracy of ' + str(np.max(score)*100))
