In [20]:
# Fine-tuning de Vision Transformer (ViT) com TensorFlow

## 1. Instalar bibliotecas necessárias
!pip install -q tensorflow tensorflow_hub tensorflow_datasets scikit-learn

## 2. Importações
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import os

## 3. Carregar o dataset de imagens
data_dir = "/content/drive/MyDrive/Base1"  # altere se necessário
batch_size = 32
img_size = (224, 224)

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.3,
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.3,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

class_names = train_ds.class_names
num_classes = len(class_names)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

vit_url = "https://tfhub.dev/sayakpaul/vit_s16_fe/1"
vit_layer = hub.KerasLayer(vit_url, trainable=True)

class ViTModel(tf.keras.Model):
    def __init__(self, vit_layer, num_classes):
        super(ViTModel, self).__init__()
        self.rescale = tf.keras.layers.Rescaling(1./255)
        self.vit = vit_layer
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dropout = tf.keras.layers.Dropout(0.3)
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs):
        x = self.rescale(inputs)
        vit_outputs = self.vit(x)
#        x = vit_outputs['pooled_output']
        x = vit_outputs
        x = self.dense1(x)
        x = self.dropout(x)
        return self.classifier(x)

# instanciar modelo
model = ViTModel(vit_layer, num_classes)

## 6. Compilar e treinar
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(train_ds, validation_data=val_ds, epochs=10)

## 7. Avaliação
# Obter dados para métricas detalhadas
y_true = []
y_pred = []

for images, labels in val_ds:
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

# Relatório
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))



Found 1000 files belonging to 10 classes.
Using 700 files for training.
Found 1000 files belonging to 10 classes.
Using 300 files for validation.
Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 10s/step - accuracy: 0.1673 - loss: 3.5079 - val_accuracy: 0.3600 - val_loss: 2.0197
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 6s/step - accuracy: 0.5152 - loss: 1.5818 - val_accuracy: 0.7200 - val_loss: 0.9760
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 6s/step - accuracy: 0.7808 - loss: 0.7024 - val_accuracy: 0.8300 - val_loss: 0.5659
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 6s/step - accuracy: 0.8887 - loss: 0.3765 - val_accuracy: 0.8867 - val_loss: 0.3851
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 6s/step - accuracy: 0.9527 - loss: 0.2312 - val_accuracy: 0.9233 - val_loss: 0.2979
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━