In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Reshape
from tensorflow.keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
import os
import urllib.request
import tarfile

# Download STL-10 dataset
url = 'http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz'
file_name = 'stl10_binary.tar.gz'

if not os.path.exists(file_name):
    urllib.request.urlretrieve(url, file_name)

# Extract the dataset
tar = tarfile.open(file_name, "r:gz")
tar.extractall()
tar.close()

# Load the dataset
data_dir = 'stl10_binary'
file_names = ['train_X.bin', 'train_y.bin', 'test_X.bin', 'test_y.bin']

x_train_path = os.path.join(data_dir, file_names[0])
y_train_path = os.path.join(data_dir, file_names[1])
x_test_path = os.path.join(data_dir, file_names[2])
y_test_path = os.path.join(data_dir, file_names[3])

x_train = np.fromfile(x_train_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_train = np.fromfile(y_train_path, dtype=np.uint8) - 1  # Class labels range from 1 to 10, so subtract 1
x_test = np.fromfile(x_test_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_test = np.fromfile(y_test_path, dtype=np.uint8) - 1  # Class labels range from 1 to 10, so subtract 1


# Preprocessing: normalize the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the VAE architecture
latent_dim = 128

# Encoder
inputs = Input(shape=(96, 96, 3))
x = Conv2D(32, (3, 3), activation='relu', strides=(2, 2), padding='same')(inputs)
x = Conv2D(64, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)

# Latent space
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Reparameterization trick

def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=(tf.shape(z_mean)[0], latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = tf.keras.layers.Lambda(sampling)([z_mean, z_log_var])

# Decoder
decoder_inputs = Input(shape=(latent_dim,))
x = Dense(6 * 6 * 64, activation='relu')(decoder_inputs)
x = Reshape((6, 6, 64))(x)
x = Conv2DTranspose(64, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
x = Conv2DTranspose(32, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
outputs = Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)

# VAE model
encoder = Model(inputs, z_mean)
decoder = Model(decoder_inputs, outputs)
vae_output = decoder(z)
vae = Model(inputs, vae_output)

# Reshape inputs and outputs

inputs_reshaped = tf.image.resize(inputs, (24, 24))
vae_output_reshaped = tf.image.resize(vae_output, (24, 24))

# Define the loss function
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.reshape(inputs_reshaped, (-1, 24 * 24 * 3)),
                                                          tf.reshape(vae_output_reshaped, (-1, 24 * 24 * 3)))

reconstruction_loss *= 24 * 24 * 3
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss, axis=-1)
kl_loss *= -0.5

vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)

# Compile the model
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

# Train the model
epochs = 100
batch_size = 128
history = vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None))

# Store the training and validation loss (reconstruction loss) in the history object
history.history['train_reconstruction_loss'] = history.history['loss']
history.history['val_reconstruction_loss'] = history.history['val_loss']


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

# Step 1: Data Preprocessing
transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Resize((32, 32)),  # Resize the images to 32x32
  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize the data
])

stl10_dataset = torchvision.datasets.STL10(root='./data', split='train', download=True, transform=transform)

# Divide data into training and validation sets
train_size = int(0.8 * len(stl10_dataset))
val_size = len(stl10_dataset) - train_size
train_dataset, val_dataset = random_split(stl10_dataset, [train_size, val_size])

trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Step 2: Model Architecture
class VAE(nn.Module):
    def __init__(self, latent_dim=64):
        super(VAE, self).__init__()
self.latent_dim = latent_dim

        # Encoder layers
self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=4, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1), nn.ReLU(),
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 1024),
            nn.ReLU(),
            nn.Linear(1024, self.latent_dim * 2),
        )

        # Decoder layers
self.decoder = nn.Sequential(
nn.Linear(self.latent_dim, 1024),
nn.ReLU(),
nn.Linear(1024, 256 * 2 * 2),
nn.ReLU(),
nn.Unflatten(1, (256, 2, 2)),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
nn.ReLU(),
            nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1),
nn.Tanh(),  # To map output to [-1, 1] range for images with normalized data
        )

    def encode(self, x):
        x = self.encoder(x)
        mu = x[:, :self.latent_dim]
logvar = x[:, self.latent_dim:]
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

# Step 3: Loss Function with Regularizer (KL Divergence)
def vae_loss(recon_x, x, mu, logvar):
    # Reconstruction Loss (MSE for images)
reconstruction_loss = nn.MSELoss()(recon_x, x)
    # KL Divergence Loss (Regularizer)
kl_divergence_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return reconstruction_loss + kl_divergence_loss

# Step 4: Training with Weight Decay and Learning Rate Scheduler
def train_vae(model, trainloader, valloader, optimizer, num_epochs=10):
model.train()
    losses = []
val_losses = []

    for epoch in range(num_epochs):
running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, _ = data
            inputs = inputs.to(device)

optimizer.zero_grad()

recon_batch, mu, logvar = model(inputs)
            loss = vae_loss(recon_batch, inputs, mu, logvar)

loss.backward()
optimizer.step()

running_loss += loss.item()

epoch_loss = running_loss / len(trainloader)
losses.append(epoch_loss)

        # Validation loss
model.eval()
        with torch.no_grad():
val_loss = 0.0
            for data in valloader:
                inputs, _ = data
                inputs = inputs.to(device)
recon_batch, mu, logvar = model(inputs)
val_loss += vae_loss(recon_batch, inputs, mu, logvar).item()

val_loss /= len(valloader)
val_losses.append(val_loss)

model.train()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}, Val Loss: {val_loss}")

    return losses, val_losses

# Step 5: Sampling and Generation
def generate_images(model, num_images=10):
model.eval()
    with torch.no_grad():
        z = torch.randn(num_images, model.latent_dim).to(device)
generated_images = model.decode(z).cpu()
        return generated_images

def plot_generated_vs_original(generated_images, original_images):
    fig, axes = plt.subplots(2, len(generated_images), figsize=(15, 5))
    for i, img in enumerate(generated_images):
img_gen = img.permute(1, 2, 0)  # Transpose to (H, W, C)
img_gen = (img_gen + 1) / 2.0  # De-normalize from [-1, 1] to [0, 1]
        axes[0, i].imshow(img_gen)
        axes[0, i].axis('off')

    for i, img in enumerate(original_images[:len(generated_images)]):
img_orig = img.permute(1, 2, 0)  # Transpose to (H, W, C)
img_orig = (img_orig + 1) / 2.0  # De-normalize from [-1, 1] to [0, 1]
        axes[1, i].imshow(img_orig)
        axes[1, i].axis('off')

    axes[0, 0].set_title('Generated Images')
    axes[1, 0].set_title('Original Images')
plt.show()

# Main
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vae_model = VAE(latent_dim=64).to(device)
optimizer = optim.Adam(vae_model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Learning rate scheduler

# Train VAE with losses recorded
train_losses, val_losses = train_vae(vae_model, trainloader, valloader, optimizer, num_epochs=10)

# Plot the learning curve
def plot_learning_curve(train_losses, val_losses):
plt.figure()
    epochs = range(1, len(train_losses) + 1)
plt.plot(epochs, train_losses, '-o', label='Train Loss')
plt.plot(epochs, val_losses, '-o', label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Variational Autoencoder Learning Curve')
plt.show()

plot_learning_curve(train_losses, val_losses)

# Generate and display images
generated_images = generate_images(vae_model, num_images=10)
original_images = []
for i, data in enumerate(valloader, 0):
    inputs, _ = data
original_images.append(inputs[0])
    if i>= 9:  # Display 10 original images
        break

plot_generated_vs_original(generated_images, original_images)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from vit_keras import vit, utils

# Load the ViT model
image_size = 384
classes = utils.get_imagenet_classes()
model = vit.vit_b16(image_size=image_size, pretrained=True, include_top=True, pretrained_top=True)

# Load an image
url = 'https://upload.wikimedia.org/wikipedia/commons/d/d7/Granny_smith_and_cross_section.jpg'
image = utils.read(url, image_size)

# Preprocess the image
X = vit.preprocess_inputs(image).reshape(1, image_size, image_size, 3)

# Make a prediction
y = model.predict(X)

# Print the predicted class
print(classes[y[0].argmax()])


In [None]:
!pip install tensorflow-addons
!pip install vit_keras

# Load the dataset
x_train_path = os.path.join(data_dir, file_names[0])
y_train_path = os.path.join(data_dir, file_names[1])
x_test_path = os.path.join(data_dir, file_names[2])
y_test_path = os.path.join(data_dir, file_names[3])

x_train = np.fromfile(x_train_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_train = np.fromfile(y_train_path, dtype=np.uint8) - 1
x_test = np.fromfile(x_test_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_test = np.fromfile(y_test_path, dtype=np.uint8) - 1

# Preprocessing: normalize the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the VIT Autoencoder architecture
latent_dim = 128

# Encoder (Vision Transformer)
inputs = Input(shape=(96, 96, 3))
x = vit.vit_l32(image_size=96, activation='gelu', pretrained=False, include_top=False, pretrained_top=False)(inputs)
x = Reshape((-1, x.shape[-1]))(x)  # Flatten the sequence of patches
x = tf.keras.layers.GlobalAveragePooling1D()(x)  # Reduce sequence to a single vector
latent_space = Dense(latent_dim, activation='relu')(x)  # Dense layer for the latent representation

# Decoder
decoder_inputs = Input(shape=(latent_dim,))
x = Dense(6 * 6 * 32, activation='relu')(decoder_inputs)
x = Reshape((6, 6, 32))(x)
x = Conv2DTranspose(32, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
x = Conv2DTranspose(16, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
outputs = Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)

# VAE model
encoder = Model(inputs, latent_space)
decoder = Model(decoder_inputs, outputs)

# Create the autoencoder by connecting the encoder and decoder
autoencoder_output = decoder(encoder(inputs))
autoencoder = Model(inputs, autoencoder_output)

# Reshape inputs and outputs
inputs_reshaped = tf.image.resize(inputs, (24, 24))
autoencoder_output_reshaped = tf.image.resize(autoencoder_output, (24, 24))

# Define the loss function (Autoencoder loss)
reconstruction_loss = tf.keras.losses.mean_squared_error(tf.reshape(inputs_reshaped, (-1, 24 * 24 * 3)),
                                                         tf.reshape(autoencoder_output_reshaped, (-1, 24 * 24 * 3)))

autoencoder_loss = tf.reduce_mean(reconstruction_loss)

# Compile the model
autoencoder.add_loss(autoencoder_loss)
autoencoder.compile(optimizer='adam')

# Train the model
epochs = 100
batch_size = 128
history = autoencoder.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None))

# Plot the learning curves for loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid()
plt.show()

# Generate and plot some reconstructed images using the VIT Autoencoder
num_samples = 5
random_indices = np.random.randint(0, len(x_test), num_samples)
sample_images = x_test[random_indices]
reconstructed_images = autoencoder.predict(sample_images)

plt.figure(figsize=(10, 4))

for i in range(num_samples):
    plt.subplot(2, num_samples, i + 1)
    plt.imshow(sample_images[i])
    plt.title("Original")
    plt.axis('off')

    plt.subplot(2, num_samples, num_samples + i + 1)
    plt.imshow(reconstructed_images[i])
    plt.title("Reconstructed")
    plt.axis('off')

plt.show()


In [None]:
# Variational Autoencoder (VAE) using a Vision Transformer (ViT) as the encoder
# Variational Autoencoder (VAE) using a Vision Transformer (ViT) as the encoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Reshape, LayerNormalization
from tensorflow.keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
import os
import urllib.request
import tarfile
import shutil
from vit_keras import vit

# Download STL-10 dataset (if not already downloaded)
url = 'http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz'
file_name = 'stl10_binary.tar.gz'

if not os.path.exists(file_name):
    urllib.request.urlretrieve(url, file_name)

# Extract the dataset (if not already extracted)
data_dir = 'stl10_binary'
file_names = ['train_X.bin', 'train_y.bin', 'test_X.bin', 'test_y.bin']

if os.path.exists(data_dir):
    shutil.rmtree(data_dir)  # Delete the existing folder if it exists

tar = tarfile.open(file_name, "r:gz")
tar.extractall()
tar.close()

# Load the dataset
x_train_path = os.path.join(data_dir, file_names[0])
y_train_path = os.path.join(data_dir, file_names[1])
x_test_path = os.path.join(data_dir, file_names[2])
y_test_path = os.path.join(data_dir, file_names[3])

x_train = np.fromfile(x_train_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_train = np.fromfile(y_train_path, dtype=np.uint8) - 1
x_test = np.fromfile(x_test_path, dtype=np.uint8).reshape(-1, 3, 96, 96).transpose(0, 2, 3, 1)
y_test = np.fromfile(y_test_path, dtype=np.uint8) - 1

# Preprocessing: normalize the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the VAE architecture with Vision Transformer
latent_dim = 128

# Load the pre-trained Vision Transformer model weights
vit_weights_path = 'ViT-L_32_imagenet21k+imagenet2012.npz'

# Encoder
inputs = Input(shape=(96, 96, 3))
x = vit.vit_l32(image_size=96, activation='relu', pretrained=False, include_top=False, pretrained_top=False)(inputs)
x = LayerNormalization(epsilon=1e-6)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
# Latent space
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Reparameterization trick
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=(tf.shape(z_mean)[0], latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = tf.keras.layers.Lambda(sampling)([z_mean, z_log_var])

# Decoder
decoder_inputs = Input(shape=(latent_dim,))
x = Dense(6 * 6 * 32, activation='relu')(decoder_inputs)
x = Reshape((6, 6, 32))(x)
x = Conv2DTranspose(32, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
x = Conv2DTranspose(16, (3, 3), activation='relu', strides=(2, 2), padding='same')(x)
outputs = Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)

# VAE model
encoder = Model(inputs, z_mean)
decoder = Model(decoder_inputs, outputs)
vae_output = decoder(z)
vae = Model(inputs, vae_output)

# Reshape inputs and outputs
inputs_reshaped = tf.image.resize(inputs, (24, 24))
vae_output_reshaped = tf.image.resize(vae_output, (24, 24))

# Define the loss function
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.reshape(inputs_reshaped, (-1, 24 * 24 * 3)),
                                                          tf.reshape(vae_output_reshaped, (-1, 24 * 24 * 3)))

reconstruction_loss *= 24 * 24 * 3
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss, axis=-1)
kl_loss *= -0.5

vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)

# Compile the model
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

# Train the model
epochs = 100
batch_size = 128
history = vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None))

# Store the training and validation loss (reconstruction loss) in the history object
history.history['train_reconstruction_loss'] = history.history['loss']
history.history['val_reconstruction_loss'] = history.history['val_loss']

# Plot Training and Validation Loss
train_loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs+1), train_loss, label='Train Loss')
plt.plot(range(1, epochs+1), val_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()
