In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, applications, optimizers, losses, metrics

# Paramètres communs
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
NUM_CLASSES = 38

# Préparation des données
def prepare_data(data_dir):
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE
    )
    
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE
    )
    
    # Optimisation des performances
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    
    return train_ds, val_ds

# Modèle VGG19
def create_vgg19_model():
    base_model = applications.VGG19(
        include_top=False,
        weights=None,
        input_shape=(224, 224, 3)
    )
    
    # Ajout des couches de classification
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    predictions = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = models.Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

# Modèle ResNet34
def create_resnet34_model():
    # Implémentation simplifiée de ResNet34
    def residual_block(x, filters, downsample=False):
        strides = (2, 2) if downsample else (1, 1)
        
        # Branche principale
        y = layers.Conv2D(filters, kernel_size=3, strides=strides, padding='same')(x)
        y = layers.BatchNormalization()(y)
        y = layers.Activation('relu')(y)
        y = layers.Conv2D(filters, kernel_size=3, padding='same')(y)
        y = layers.BatchNormalization()(y)
        
        # Connexion résiduelle
        if downsample:
            x = layers.Conv2D(filters, kernel_size=1, strides=strides, padding='same')(x)
            x = layers.BatchNormalization()(x)
        
        y = layers.add([x, y])
        y = layers.Activation('relu')(y)
        return y
    
    # Architecture ResNet34
    input = layers.Input(shape=(224, 224, 3))
    x = layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(input)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    
    # Couches résiduelles
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    
    x = residual_block(x, 128, downsample=True)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    
    x = residual_block(x, 256, downsample=True)
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    
    x = residual_block(x, 512, downsample=True)
    x = residual_block(x, 512)
    x = residual_block(x, 512)
    
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    predictions = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = models.Model(inputs=input, outputs=predictions)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

# Modèle DenseNet121
def create_densenet121_model():
    base_model = applications.DenseNet121(
        include_top=False,
        weights=None,
        input_shape=(224, 224, 3)
    )
    
    # Ajout des couches de classification
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    predictions = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = models.Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

In [2]:
# Modèle VGG19 avec poids pré-entraînés
def create_vgg19_pretrained():
    base_model = applications.VGG19(
        include_top=False,
        weights='imagenet',
        input_shape=(224, 224, 3)
    )
    
    # Gel des couches de base
    base_model.trainable = False
    
    # Ajout de nouvelles couches de classification
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    predictions = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = models.Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

# Fonction similaire pour ResNet34 et DenseNet121 avec poids pré-entraînés

In [8]:
def create_efficientnet_model():
    base_model = applications.EfficientNetB0(
        include_top=False,
        weights='imagenet',
        input_shape=(224, 224, 3)
    )
    
    base_model.trainable = False
    
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    predictions = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = models.Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses

class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size, projection_dim, **kwargs):
        super(PatchEmbedding, self).__init__(**kwargs)
        self.patch_size = patch_size
        self.projection = layers.Conv2D(
            filters=projection_dim,
            kernel_size=patch_size,
            strides=patch_size,
            padding='valid'
        )
        self.reshape = layers.Reshape((-1, projection_dim))
    
    def call(self, images):
        patches = self.projection(images)
        patches = self.reshape(patches)
        return patches

class AddCLSToken(layers.Layer):
    def __init__(self, projection_dim, **kwargs):
        super(AddCLSToken, self).__init__(**kwargs)
        self.projection_dim = projection_dim
        self.cls_token = self.add_weight(
            shape=(1, 1, projection_dim),
            initializer='random_normal',
            trainable=True,
            name='cls_token'
        )
    
    def call(self, patches):
        batch_size = tf.shape(patches)[0]
        cls_tokens = tf.tile(self.cls_token, [batch_size, 1, 1])
        return tf.concat([cls_tokens, patches], axis=1)

class PositionalEmbedding(layers.Layer):
    def __init__(self, num_patches, projection_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.num_patches = num_patches
        self.position_embedding = layers.Embedding(
            input_dim=num_patches + 1,
            output_dim=projection_dim
        )
    
    def call(self, tokens):
        positions = tf.range(start=0, limit=self.num_patches + 1, delta=1)
        encoded_positions = self.position_embedding(positions)
        return tokens + encoded_positions

class TransformerBlock(layers.Layer):
    def __init__(self, projection_dim, num_heads, **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads,
            key_dim=projection_dim // num_heads
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dense1 = layers.Dense(projection_dim * 2, activation='relu')
        self.dense2 = layers.Dense(projection_dim)
        self.dropout1 = layers.Dropout(0.1)
        self.dropout2 = layers.Dropout(0.1)
    
    def call(self, inputs, training=False):
        # Self-attention
        attention_output = self.attention(inputs, inputs)
        attention_output = self.dropout1(attention_output, training=training)
        x = self.layernorm1(inputs + attention_output)
        
        # Feed-forward network
        ffn_output = self.dense1(x)
        ffn_output = self.dense2(ffn_output)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(x + ffn_output)

def create_simple_vit_model(input_shape=(224, 224, 3), num_classes=38):
    # Paramètres réduits pour ViT
    patch_size = 32
    projection_dim = 32
    num_heads = 2
    transformer_layers = 4
    
    num_patches = (input_shape[0] // patch_size) ** 2
    
    # Input
    inputs = layers.Input(shape=input_shape)
    
    # Normalisation
    x = layers.Rescaling(1./255)(inputs)
    
    # Patch embedding
    patches = PatchEmbedding(patch_size, projection_dim, name='patch_embedding')(x)
    
    # Ajouter le token [CLS]
    tokens = AddCLSToken(projection_dim, name='add_cls_token')(patches)
    
    # Ajouter positional encoding
    tokens = PositionalEmbedding(num_patches, projection_dim, name='positional_embedding')(tokens)
    
    # Couches Transformer
    for i in range(transformer_layers):
        tokens = TransformerBlock(
            projection_dim, 
            num_heads, 
            name=f'transformer_block_{i}'
        )(tokens)
    
    # Classification - utiliser le token [CLS]
    cls_token = layers.Lambda(lambda x: x[:, 0], name='extract_cls_token')(tokens)
    x = layers.Dense(256, activation='relu', name='classifier_1')(cls_token)
    x = layers.Dropout(0.5, name='dropout_1')(x)
    x = layers.Dense(128, activation='relu', name='classifier_2')(x)
    x = layers.Dropout(0.3, name='dropout_2')(x)
    outputs = layers.Dense(num_classes, activation='softmax', name='output')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name='vision_transformer')
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

# Version encore plus simple pour débuter
def create_minimal_vit_model(input_shape=(224, 224, 3), num_classes=38):
    patch_size = 32
    projection_dim = 32
    num_patches = (input_shape[0] // patch_size) ** 2
    
    inputs = layers.Input(shape=input_shape)
    
    # Normalisation et patch embedding
    x = layers.Rescaling(1./255)(inputs)
    x = layers.Conv2D(projection_dim, patch_size, patch_size, padding='valid')(x)
    x = layers.Reshape((num_patches, projection_dim))(x)
    
    # Ajouter positional encoding
    positions = tf.range(num_patches)
    positional_encoding = layers.Embedding(num_patches, projection_dim)(positions)
    x = x + positional_encoding
    
    # Self-attention simple
    attention_output = layers.MultiHeadAttention(
        num_heads=2, 
        key_dim=projection_dim // 2
    )(x, x)
    x = layers.Add()([x, attention_output])
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    
    # Global average pooling et classification
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0001),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model

# Utilisation
print("Création du modèle ViT minimal...")
vit_model = create_minimal_vit_model()
vit_model.summary()

print("\nCréation du modèle ViT simple...")
vit_model_simple = create_simple_vit_model()
vit_model_simple.summary()

Création du modèle ViT minimal...



Création du modèle ViT simple...

