<a href="https://colab.research.google.com/github/andrehochuli/teaching/blob/main/ComputerVision/Lecture%2005%20-%20Feature%20Extraction/Lecture_05_Avalia%C3%A7%C3%A3o_Formativa_Template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Auxiliary Functions

In [None]:
import numpy as np
import cv2, math
import matplotlib.pyplot as plt
#Keras to import datasets, not for deep learning (yet)
from tensorflow import keras
from sklearn.decomposition import PCA
from sklearn import metrics, preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.manifold import TSNE
import seaborn as sns
import pandas as pd
import skimage.feature as feature
from sklearn.model_selection import train_test_split
import os
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [None]:
#Auxiliary Function to plot side by side
#@Author: Prof. André Hochuli
#Visualiza um lista de figuras lado a lado, facilitando a comparação qualitativa
def plot_sidebyside(img_list,titles=None,colormap=None,figsize=(12,6)):
  n = len(img_list)
  figure, axis = plt.subplots(1, n, figsize=figsize)

  if titles is None:
    titles = []
    A = ord('A')
    for i in range(n):
      titles.append(chr(A+i))

  for i in range(n):
    axis[i].imshow(img_list[i], cmap=colormap)
    axis[i].set_title(titles[i])
    axis[i].axis('off')
  # Combine all the operations and display
  plt.show()

In [None]:
#@Author: Prof. André Hochuli
#Compila os resultados para analises qualitativas e quantitativas
def performance_evaluation(x_test, y_test, predictions, class_names, info_message):

    print(f"Evaluation of {info_message}")
    print(metrics.classification_report(y_test, predictions))


    # Matriz de confusão
    disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test, predictions)
    disp.figure_.suptitle("Confusion Matrix")
    plt.show()

    # Imagens classificadas corretamente
    correct_idx = np.where(y_test == predictions)[0]
    n_correct = min(10, len(correct_idx))
    if n_correct > 0:
        plt.figure(figsize=(22, 4))
        for i in range(n_correct):
            idx = correct_idx[i]
            plt.subplot(1, n_correct, i+1)
            plt.imshow(x_test[idx], cmap='gray', interpolation='nearest')
            plt.axis('off')
            plt.title(f"Lbl:{y_test[idx]} Pred:{predictions[idx]}")
        plt.suptitle("Correct Predictions", fontsize=16, fontweight='bold', color='white', backgroundcolor='green')
        plt.show()

    #Imagens classificadas incorretamente
    wrong_idx = np.where(y_test != predictions)[0]
    n_wrong = min(10, len(wrong_idx))
    if n_wrong > 0:
        plt.figure(figsize=(22, 4))
        for i in range(n_wrong):
            idx = wrong_idx[i]
            plt.subplot(1, n_wrong, i+1)
            plt.imshow(x_test[idx], cmap='gray', interpolation='nearest')
            plt.axis('off')
            plt.title(f"Lbl:{y_test[idx]} Pred:{predictions[idx]}")
        plt.suptitle("Wrong Predictions", fontsize=16, fontweight='bold', color='white', backgroundcolor='red')
        plt.show()


    #Exibir exemplo de cada classe
    unique_classes = np.unique(y_test)
    plt.figure(figsize=(22, 4))
    for i, cls in enumerate(unique_classes):
        idx = np.where(y_test == cls)[0][0]  # primeiro índice da classe
        plt.subplot(1, len(unique_classes), i+1)
        plt.imshow(x_test[idx], cmap='gray', interpolation='nearest')
        plt.axis('off')
        plt.title(f"{i}-{class_names[cls]}")
    plt.suptitle("Example of each class", fontsize=16, fontweight='bold', color='black', backgroundcolor='yellow')
    plt.show()


In [None]:
def random_shuffle(X_train, y_train, X_test, y_test, random_state=42):
    np.random.seed(random_state)

    # Shuffle train
    idx_train = np.random.permutation(len(y_train))
    X_train_shuffled = X_train[idx_train]
    y_train_shuffled = y_train[idx_train]

    # Shuffle test
    idx_test = np.random.permutation(len(y_test))
    X_test_shuffled = X_test[idx_test]
    y_test_shuffled = y_test[idx_test]

    return X_train_shuffled, y_train_shuffled, X_test_shuffled, y_test_shuffled

In [None]:
import numpy as np
from collections import Counter

def random_undersampling(X, y, random_state=42):

    np.random.seed(random_state)

    # Contar amostras por classe
    class_counts = Counter(y)
    min_count = min(class_counts.values())

    X_resampled = []
    y_resampled = []

    for cls in class_counts.keys():
        idx_cls = np.where(y == cls)[0]
        # Seleciona min_count índices aleatoriamente
        selected_idx = np.random.choice(idx_cls, size=min_count, replace=False)
        X_resampled.append(X[selected_idx])
        y_resampled.append(y[selected_idx])

    X_resampled = np.concatenate(X_resampled, axis=0)
    y_resampled = np.concatenate(y_resampled, axis=0)

    # Embaralhar os dados novamente
    shuffle_idx = np.random.permutation(len(y_resampled))
    X_resampled = X_resampled[shuffle_idx]
    y_resampled = y_resampled[shuffle_idx]

    return X_resampled, y_resampled


In [None]:
def retrieve_swedish_leaf(base_dir):
    dataset_url = "http://www.ppgia.pucpr.br/~aghochuli/swedish_leaf.zip"
    zip_path = "swedish_leaf.zip"

    if not os.path.exists(base_dir):
      print("Downloading...")
      os.system(f"wget -O {zip_path} {dataset_url}")
      os.system(f"unzip {zip_path}")
      #os.remove(zip_path)
      print(f"Dataset extracted to: {base_dir}")
    else:
      print(f"Dataset already available at: {base_dir}")

def load_swedish_leaf():

    base_dir = "swedish_leaf"

    retrieve_swedish_leaf(base_dir)

    x_train = []
    y_train = []

    for i in range(1, 9):
        leaf_dir = os.path.join(base_dir, f'leaf{i}')
        if os.path.isdir(leaf_dir):

            for filename in os.listdir(leaf_dir):
                if filename.endswith('.png'):
                    img_path = os.path.join(leaf_dir, filename)
                    try:
                        # Carrega em escala de cinza
                        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                        # Redimensiona para 128x128
                        img_resized = cv2.resize(img, (128, 128))

                        x_train.append(img_resized)
                        y_train.append(i - 1)  # Labels de 0 a 7
                    except Exception as e:
                        print(f"Error loading image {img_path}: {e}")

    # Holdout 70/30 estratificado
    X_train, X_test, y_train_split, y_test_split = train_test_split(
        x_train, y_train,
        test_size=0.3,
        random_state=42,
        stratify=y_train
    )

    class_names = {
      0: 'Ulmus carpinifolia',
      1: 'Acer',
      2: 'Salix aurita',
      3: 'Quercus',
      4: 'Alnus incana',
      5: 'Betula pubescens',
      6: 'Salix alba \'Sericea\'',
      7: 'Populus tremula'
    }

    return np.array(X_train),np.array(y_train_split),np.array(X_test),np.array(y_test_split), class_names

In [None]:
def retrieve_paper_rock_scissors(base_dir):
    dataset_url = "http://www.ppgia.pucpr.br/~aghochuli/paper-rock-scissors.zip"
    zip_path = "paper-rock-scissors.zip"

    if not os.path.exists(base_dir):
        print("Downloading...")
        os.system(f"wget -O {zip_path} {dataset_url}")
        os.system(f"unzip {zip_path}")
        #os.remove(zip_path)
        print(f"Dataset extracted to: {base_dir}")
    else:
        print(f"Dataset already available at: {base_dir}")

def load_paper_rock_scissors():

    base_dir = "paper-rock-scissors"

    retrieve_paper_rock_scissors(base_dir)

    x_data = []
    y_data = []

    # Mapeamento das classes
    class_names = {
        0: 'paper',
        1: 'rock',
        2: 'scissors'
    }

    for label, cls in class_names.items():
        cls_dir = os.path.join(base_dir, cls)
        if os.path.isdir(cls_dir):
            for filename in os.listdir(cls_dir):
                if filename.endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(cls_dir, filename)
                    try:
                        # Carrega em escala de cinza
                        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                        # Redimensiona para 128x128
                        img_resized = cv2.resize(img, (128, 128))
                        x_data.append(img_resized)
                        y_data.append(label)
                    except Exception as e:
                        print(f"Error loading image {img_path}: {e}")

    # Holdout 70/30 estratificado
    X_train, X_test, y_train_split, y_test_split = train_test_split(
        x_data, y_data,
        test_size=0.3,
        random_state=42,
        stratify=y_data
    )

    return (
        np.array(X_train),
        np.array(y_train_split),
        np.array(X_test),
        np.array(y_test_split),
        class_names
    )

In [None]:
import os
import cv2
import numpy as np

def retrieve_vehicle(base_dir):
    dataset_url = "http://www.ppgia.pucpr.br/~aghochuli/vehicle.zip"
    zip_path = "vehicle.zip"

    if not os.path.exists(base_dir):
        print("Downloading...")
        os.system(f"wget -O {zip_path} {dataset_url}")
        os.system(f"unzip {zip_path}")
        #os.remove(zip_path)
        print(f"Dataset extracted to: {base_dir}")
    else:
        print(f"Dataset already available at: {base_dir}")

def load_vehicle():
    base_dir = "vehicle"
    retrieve_vehicle(base_dir)

    # Estrutura esperada
    subsets = ['train', 'test']
    class_names = {}
    data = {}

    for subset in subsets:
        subset_dir = os.path.join(base_dir, subset)
        x_data = []
        y_data = []
        if os.path.isdir(subset_dir):
            classes = [d for d in os.listdir(subset_dir) if os.path.isdir(os.path.join(subset_dir, d))]
            # Mapear classes para labels se ainda não mapeadas
            for idx, cls in enumerate(classes):
                if cls not in class_names.values():
                    class_names[idx] = cls
            # Carregar imagens
            for label, cls in class_names.items():
                cls_dir = os.path.join(subset_dir, cls)
                if os.path.isdir(cls_dir):
                    for filename in os.listdir(cls_dir):
                        if filename.endswith(('.png', '.jpg', '.jpeg')):
                            img_path = os.path.join(cls_dir, filename)
                            try:
                                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                                img_resized = cv2.resize(img, (128, 128))
                                x_data.append(img_resized)
                                y_data.append(label)
                            except Exception as e:
                                print(f"Error loading image {img_path}: {e}")
        data[subset] = (np.array(x_data), np.array(y_data))

    return data['train'][0], data['train'][1], data['test'][0], data['test'][1], class_names

# Descriptors

# Load Data

In [None]:
x_train, y_train, x_test, y_test, class_names = load_swedish_leaf()
#x_train, y_train, x_test, y_test, class_names = load_paper_rock_scissors()
#x_train, y_train, x_test, y_test, class_names = load_vehicle()




In [None]:
samples_per_class = 10
for cls in np.unique(y_train):
    idxs = np.where(y_train == cls)[0]
    idxs = np.random.choice(idxs, samples_per_class, replace=False)
    imgs = [x_train[i] for i in idxs]
    titles = [f"{class_names[cls]} #{i+1}" for i in range(samples_per_class)]
    plot_sidebyside(imgs, titles=titles, colormap="gray", figsize=(20, 10))