In [1]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Generate synthetic data
def create_synthetic_data(samples=1000, time_steps=128, features=10, classes=2):
    X = np.random.rand(samples, time_steps, features).astype(np.float32)  # Random sequences
    y = np.random.randint(classes, size=(samples, 1))  # Random labels (binary)
    return X, y

# Create dataset
X, y = create_synthetic_data()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
import tensorflow as tf

class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.projection_dim = embed_dim // num_heads
        self.query_dense = tf.keras.layers.Dense(embed_dim)
        self.key_dense = tf.keras.layers.Dense(embed_dim)
        self.value_dense = tf.keras.layers.Dense(embed_dim)
        self.combine_heads = tf.keras.layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, _ = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(ff_dim, activation="relu"),
            tf.keras.layers.Dense(embed_dim),
        ])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


def positional_encoding(position, d_model):
    def get_angles(pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates

    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

def build_transformer_model(input_shape, embed_dim, num_heads, ff_dim, num_classes):
    inputs = tf.keras.Input(shape=input_shape)

    # Positional Encoding with embed_dim equal to the number of input features
    position_encoding = positional_encoding(input_shape[0], embed_dim)
    x = inputs + position_encoding  # This will now match the input dimension

    # Transformer Block
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(x, training=True)  # Pass the training flag

    # Global Average Pooling
    x = tf.keras.layers.GlobalAveragePooling1D()(x)

    # Fully Connected Layers
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.1)(x, training=True)  # Also ensure dropout receives the training flag

    # Output Layer
    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model


# Hyperparameters
embed_dim = 10  # Embedding size for each token
num_heads = 4   # Number of attention heads
ff_dim = 128    # Hidden layer size in the feed-forward network
num_classes = 2 # Binary classification

# Build the model
input_shape = (X_train.shape[1], X_train.shape[2])  # (time_steps, features)
model = build_transformer_model(input_shape, embed_dim, num_heads, ff_dim, num_classes)

# Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Model summary
model.summary()


In [6]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 112ms/step - accuracy: 0.5042 - loss: 0.6981 - val_accuracy: 0.4700 - val_loss: 0.6952
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.5194 - loss: 0.6926 - val_accuracy: 0.4700 - val_loss: 0.6979
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5219 - loss: 0.6930 - val_accuracy: 0.5300 - val_loss: 0.6921
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4785 - loss: 0.6963 - val_accuracy: 0.4700 - val_loss: 0.6995
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5081 - loss: 0.6958 - val_accuracy: 0.4800 - val_loss: 0.6938
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5015 - loss: 0.6932 - val_accuracy: 0.4700 - val_loss: 0.7024
Epoch 7/10
[1m25/25[0m [32m━━━━━━━

In [7]:
# Evaluate the model
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4467 - loss: 0.6968 
Validation Loss: 0.6953997611999512
Validation Accuracy: 0.4749999940395355


In [8]:
import torch
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_text_embedding(text):
    inputs = tokenizer(text, return_tensors='pt')
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1)  # Extract the embedding

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [9]:
import torch.nn as nn

class Generator(nn.Module):
    def __init__(self, z_dim, text_embedding_dim):
        super(Generator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(z_dim + text_embedding_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 64 * 64 * 3),
            nn.Tanh()  # Generate an image of 64x64 with 3 channels (RGB)
        )

    def forward(self, noise, text_embedding):
        x = torch.cat((noise, text_embedding), dim=1)
        x = self.fc(x)
        return x.view(-1, 3, 64, 64)

class Discriminator(nn.Module):
    def __init__(self, text_embedding_dim):
        super(Discriminator, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Flatten()
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 8 * 8 + text_embedding_dim, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1),
            nn.Sigmoid()  # Binary classification for real vs fake
        )

    def forward(self, image, text_embedding):
        x = self.conv(image)
        x = torch.cat((x, text_embedding), dim=1)
        return self.fc(x)

In [11]:
import torch.optim as optim

def train_gan(generator, discriminator, dataloader, epochs, z_dim, text_embedding_dim, device):
    criterion = nn.BCELoss()
    g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)
    d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)

    for epoch in range(epochs):
        for real_images, captions in dataloader:
            real_images = real_images.to(device)
            text_embeddings = torch.stack([get_text_embedding(caption) for caption in captions]).to(device)
            batch_size = real_images.size(0)

            # Create labels
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            # Train Discriminator
            noise = torch.randn(batch_size, z_dim).to(device)
            fake_images = generator(noise, text_embeddings)

            d_optimizer.zero_grad()
            real_loss = criterion(discriminator(real_images, text_embeddings), real_labels)
            fake_loss = criterion(discriminator(fake_images.detach(), text_embeddings), fake_labels)
            d_loss = real_loss + fake_loss
            d_loss.backward()
            d_optimizer.step()

            # Train Generator
            g_optimizer.zero_grad()
            g_loss = criterion(discriminator(fake_images, text_embeddings), real_labels)
            g_loss.backward()
            g_optimizer.step()

        print(f'Epoch [{epoch+1}/{epochs}], d_loss: {d_loss.item()}, g_loss: {g_loss.item()}')

# Example usage
z_dim = 100
text_embedding_dim = 768  # Using BERT embeddings
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

generator = Generator(z_dim, text_embedding_dim).to(device)
discriminator = Discriminator(text_embedding_dim).to(device)

# Assume you have a dataloader that provides real_images and captions
train_gan(generator, discriminator, dataloader, epochs=100, z_dim=z_dim, text_embedding_dim=text_embedding_dim, device=device)


NameError: name 'dataloader' is not defined