<a href="https://colab.research.google.com/github/danielsoy/ADer/blob/main/Copia_de_train_vit_colab_funka.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

GPU Setup and Verification

In [None]:
import tensorflow as tf
print("GPU Available: ", tf.config.list_physical_devices('GPU'))

GPU Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

GPU Optimizations

In [None]:
tf.keras.mixed_precision.set_global_policy('mixed_float16')
tf.config.optimizer.set_jit(True)

In [None]:
import numpy as np
from tensorflow.keras import layers
import os
import cv2
from google.colab import drive
import pickle
from tensorflow.data.experimental import AUTOTUNE

[Previous class definitions remain the same]

In [None]:
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size
    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

In [None]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super().__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )
    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

[Previous create_vit_autoencoder function remains the same]

In [None]:
def create_vit_autoencoder(input_shape, patch_size, projection_dim, num_heads, transformer_layers):
    inputs = layers.Input(shape=input_shape)
    patches = Patches(patch_size)(inputs)
    num_patches = (input_shape[0] // patch_size) ** 2
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)
    for _ in range(transformer_layers):
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        x2 = layers.Add()([attention_output, encoded_patches])
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x4 = layers.Dense(projection_dim * 2, activation="gelu")(x3)
        x4 = layers.Dropout(0.1)(x4)
        x4 = layers.Dense(projection_dim)(x4)
        encoded_patches = layers.Add()([x4, x2])
    x = layers.Dense(patch_size * patch_size * 3)(encoded_patches)
    x = layers.Reshape((input_shape[0], input_shape[1], 3))(x)

    return tf.keras.Model(inputs, x)

Mount Drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


Parameters

In [None]:
max_images = 500 # Testing with 100 images
patch_size = 8
projection_dim = 128
num_heads = 8
transformer_layers = 8
input_shape = (224, 224, 3)
batch_size = 32

Add this function before dataset creation

In [None]:
def load_and_preprocess_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = tf.cast(img, tf.float32) / 255.0
    return img

Create and process datasets

In [None]:
data_folder = '/content/drive/MyDrive/good'
image_paths = tf.data.Dataset.list_files(f"{data_folder}/*.*")
image_paths = image_paths.shuffle(buffer_size=1000).take(max_images)

Calculate split sizes

In [None]:
val_size = int(max_images * 0.1)
train_size = max_images - val_size

Split and process datasets

In [None]:
train_ds = image_paths.take(train_size)
val_ds = image_paths.skip(train_size)

In [None]:
train_ds = train_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache().shuffle(1000).batch(batch_size).prefetch(AUTOTUNE)

In [None]:
val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.cache().batch(batch_size).prefetch(AUTOTUNE)

Create and compile model

In [None]:
model = create_vit_autoencoder(
    input_shape,
    patch_size,
    projection_dim,
    num_heads,
    transformer_layers
)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)

In [None]:
model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mae']
)

Process datasets to create input-output pairs

In [None]:
def prepare_dataset(ds):
    return ds.map(lambda x: (x, x))  # Create (input, output) pairs

In [None]:
train_ds = prepare_dataset(train_ds)
val_ds = prepare_dataset(val_ds)

Train the model

In [None]:
history = model.fit(
    train_ds,
    epochs=30,
    validation_data=val_ds,
    shuffle=True,
    verbose=1
)

Epoch 1/30


Save results

In [None]:
model.save('/content/drive/MyDrive/pasta_vit_model.keras')
with open('/content/drive/MyDrive/training_history.pkl', 'wb') as file:
    pickle.dump(history.history, file)