<a href="https://colab.research.google.com/github/ap5967ap/Coursera-DeepLearning.AI-Stanford-University-Machine-Learning-Specialization/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Flatten, Dropout, LayerNormalization
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import numpy as np


In [31]:
class PatchEmbedding(Layer):
    def __init__(self, patch_size, embed_dim):
        super(PatchEmbedding, self).__init__()
        self.patch_size = patch_size
        self.embed_dim = embed_dim
        self.projection = Dense(embed_dim)

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID'
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        embeddings = self.projection(patches)
        return embeddings


In [32]:
class PositionalEncoding(Layer):
    def __init__(self, num_patches, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(num_patches, embed_dim)

    def positional_encoding(self, num_patches, embed_dim):
        # Create a range for positions and the division term
        positions = tf.range(num_patches, dtype=tf.float32)[:, tf.newaxis]  # Shape: [num_patches, 1]
        div_term = tf.exp(tf.range(0, embed_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / embed_dim))

        # Compute sine and cosine for even and odd indices
        even_indices = tf.sin(positions * div_term)
        odd_indices = tf.cos(positions * div_term)

        # Combine even and odd indices
        pos_encoding = tf.concat([even_indices, odd_indices], axis=1)

        return pos_encoding[:, :embed_dim]  # Ensure the shape matches [num_patches, embed_dim]

    def call(self, x):
        return x + self.pos_encoding


In [68]:
class TransformerEncoderBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate=0.1):
        super(TransformerEncoderBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


In [34]:
def create_vit_model(input_shape, patch_size, embed_dim, num_heads, ff_dim, num_layers, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    patches = PatchEmbedding(patch_size, embed_dim)(inputs)
    num_patches = (input_shape[0] // patch_size) * (input_shape[1] // patch_size)
    positions = PositionalEncoding(num_patches, embed_dim)(patches)
    x = positions

    for _ in range(num_layers):
        x = TransformerEncoderBlock(embed_dim, num_heads, ff_dim)(x)  # No need to pass training

    x = LayerNormalization(epsilon=1e-6)(x)
    x = Flatten()(x)
    x = Dense(ff_dim, activation='relu')(x)
    x = Dropout(0.1)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs)


In [35]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype("int32") / 255.0
x_test = x_test.astype("int32") / 255.0

# One-hot encode labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)


In [36]:
vit_model = create_vit_model(
    input_shape=(32, 32, 3),
    patch_size=4,
    embed_dim=64,
    num_heads=4,
    ff_dim=128,
    num_layers=8,
    num_classes=10
)

In [41]:
vit_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [47]:
history = vit_model.fit(x_train, y_train, batch_size=64, epochs=100, validation_split=0.2, verbose=1)


In [44]:
vit_model.save("./vit.keras")

In [45]:
model=vit_model

In [53]:
from tensorflow.keras.utils import to_categorical
import numpy as np

# Load CIFAR-10 Dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Function to convert images into byte vectors
def images_to_byte_vectors(images):
    # Flatten each image and cast to uint8 (0-255)
    byte_vectors = images.reshape(images.shape[0], -1).astype(np.uint8)
    return byte_vectors

# Convert train and test images to byte vectors
x_train_bytes = images_to_byte_vectors(x_train)
x_test_bytes = images_to_byte_vectors(x_test)

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Debug: Check shapes
print(f"x_train_bytes shape: {x_train_bytes.shape}")  # Expected: (50000, 3072)
print(f"x_test_bytes shape: {x_test_bytes.shape}")    # Expected: (10000, 3072)


x_train_bytes shape: (50000, 3072)
x_test_bytes shape: (10000, 3072)


In [61]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Conv1D, LayerNormalization, Dropout, GlobalAveragePooling1D
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

In [81]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

class ByteEmbedding(Layer):
    def __init__(self, embed_dim):
        super(ByteEmbedding, self).__init__()
        self.embed_dim = embed_dim
        self.embedding_weights = tf.Variable(
            initial_value=tf.random.normal([256, embed_dim], stddev=0.02),
            trainable=True,
            name="byte_embedding_weights"
        )

    def call(self, inputs):
        # Lookup the embeddings for the input byte indices
        embeddings = tf.gather(self.embedding_weights, inputs)
        return embeddings


In [82]:
class ByteFormer(Model):
    def __init__(self, num_patches, embed_dim, num_heads, ff_dim, num_blocks, kernel_size, stride, dropout_rate=0.1):
        super(ByteFormer, self).__init__()

        self.byte_embedding = ByteEmbedding(embed_dim)  # Byte embedding layer
        self.conv1d = Conv1D(filters=embed_dim, kernel_size=kernel_size, strides=stride, padding="valid", activation="relu")
        self.pos_encoding = PositionalEncoding(num_patches, embed_dim)  # Positional Encoding
        self.transformer_blocks = [TransformerEncoderBlock(embed_dim, num_heads, ff_dim, dropout_rate) for _ in range(num_blocks)]
        self.pooling = GlobalAveragePooling1D()  # Pooling after the transformer blocks
        self.classifier = Dense(10, activation='softmax')  # Final classifier layer (CIFAR-10 has 10 classes)

    def call(self, x, training=False):
        # Step 1: Byte embedding
        x = self.byte_embedding(x)

        # Step 2: Strided convolution to reduce sequence length
        x = self.conv1d(x)

        # Step 3: Add positional encoding
        x = self.pos_encoding(x)

        # Step 4: Pass through transformer encoder blocks
        for block in self.transformer_blocks:
            x = block(x, training=training)

        # Step 5: Global Average Pooling
        x = self.pooling(x)

        # Step 6: Final classification layer
        return self.classifier(x)



In [83]:
# Model Parameters
sequence_length = 1024  # Input sequence length before convolution
embed_dim = 128         # Embedding dimension
num_heads = 8           # Number of attention heads
ff_dim = 512            # Feed-forward dimension
num_blocks = 4          # Number of transformer encoder blocks
dropout_rate = 0.1      # Dropout rate for regularization
kernel_size = 3         # Kernel size for Conv1D
stride = 2              # Stride for Conv1D

# Adjust number of patches after convolution
num_patches = (sequence_length - kernel_size) // stride + 1

# Instantiate the model
model = ByteFormer(num_patches=num_patches, embed_dim=embed_dim, num_heads=num_heads,
                   ff_dim=ff_dim, num_blocks=num_blocks, kernel_size=kernel_size, stride=stride,
                   dropout_rate=dropout_rate)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

AttributeError: 'ByteEmbedding' object has no attribute 'compile'

In [78]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

class ByteEmbedding(Model):
    def __init__(self, embed_dim):
        super(ByteEmbedding, self).__init__()
        self.embed_dim = embed_dim
        self.byte_embedding = tf.keras.layers.Embedding(input_dim=256, output_dim=embed_dim)

    def call(self, inputs):
        embeddings = self.byte_embedding(inputs)
        return embeddings


In [79]:
model11=ByteEmbedding(embed_dim)
model11.compile(optimizer='adam',loss='categorical_crossentropy', metrics=['accuracy'])

In [80]:
model11.summary()