In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, FastICA

In [None]:
batch_size = 100
img_height = 224
img_width = 224
DATASET_FOLDER = 'ethnicity/data/fairface-ethnicity/'

In [None]:
training_dataset, validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    directory=DATASET_FOLDER,
    labels='inferred',
    label_mode='categorical',
    color_mode='grayscale',
    image_size=(img_height, img_width),
    batch_size=batch_size,
    validation_split=0.2,
    subset='both',
    shuffle=True,
    seed=97623,
)

# Displaying the first 100 images

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in training_dataset.take(1):
    for i in range(100):
        ax = plt.subplot(10, 10, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"), cmap='gray')
        # plt.title(training_dataset.class_names[np.argmax(labels[i])])
        plt.axis("off")

plt.show()


# PCA

In [None]:
batch = training_dataset.as_numpy_iterator().next()

print('Dimensão de um Batch de dados', batch[0].shape)

data = batch[0].reshape(100, -1)

print('Dimensão de um Batch de dados após reshape', data.shape)
pca = PCA(n_components=40)
pca_data = pca.fit_transform(data.T).T
pca_data = pca_data.reshape(40, 224, 224)

fig=plt.figure(figsize=(30, 30))

columns = 8
rows = 5

for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    plt.imshow(pca_data[i-1], cmap='gray')
    plt.axis('off')
    plt.title(str(i))
    
plt.show()

# ICA

In [None]:
batch = training_dataset.as_numpy_iterator().next()

print('Dimensão de um Batch de dados', batch[0].shape)

data = batch[0].reshape(100, -1)

print('Dimensão de um Batch de dados após reshape', data.shape)
ica = FastICA(n_components=40)
ica_data = ica.fit_transform(data.T).T
ica_data = ica_data.reshape(40, 224, 224)

fig=plt.figure(figsize=(30, 30))

columns = 8
rows = 5

for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    plt.imshow(ica_data[i-1], cmap='gray')
    plt.axis('off')
    plt.title(str(i))
    
plt.show()

# Scikit-Learn Gaussian Mixture Models

In [None]:
from sklearn.mixture import GaussianMixture
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
training_dataset, validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    directory=DATASET_FOLDER,
    labels='inferred',
    label_mode='categorical',
    color_mode='grayscale',
    image_size=(img_height, img_width),
    batch_size=None,
    validation_split=0.2,
    subset='both',
    shuffle=True,
    seed=97623,
)

In [None]:
n_samples = 2000
n_components = 7

In [None]:
# Flatten some images to fit the model
data = np.array([img for img, _ in training_dataset.take(n_samples)]).reshape(n_samples, -1)

print('Data shape:', data.shape)

In [None]:
gaussian_mixture_model = GaussianMixture(n_components=n_components)

gaussian_mixture_model.fit(data)


# Próximos testes

1. PCA p/ cada de etnia
Sabemos o que varia mais pra cada etnia

Podemos usar essa informação para criar uma mistura?


a. Modelar a mistura em nível baixo (com os prórpios pixels das imagens)

b. Modelar mistura com as features de uma rede neural

c. PCA Supervisionado
- https://www.sciencedirect.com/science/article/pii/S0031320310005819


PRÓXIMA SEMANA

- APRESENTAÇÃO DO PCA SUPERVISIONADO