**GAN**

In [1]:
!git clone https://github.com/AvonYangXX1/AMPLify-Feedback.git
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

Cloning into 'AMPLify-Feedback'...
remote: Enumerating objects: 335, done.[K
remote: Counting objects: 100% (335/335), done.[K
remote: Compressing objects: 100% (209/209), done.[K
remote: Total 335 (delta 155), reused 293 (delta 121), pack-reused 0[K
Receiving objects: 100% (335/335), 12.30 MiB | 1.93 MiB/s, done.
Resolving deltas: 100% (155/155), done.
Updating files: 100% (56/56), done.


In [2]:
seq_train = np.load("AMPLify-Feedback/processed_data/test/seq_cv.npy")
state_train = np.load("AMPLify-Feedback/processed_data/test/state_cv.npy")
label_train = np.load("AMPLify-Feedback/processed_data/test/label_cv.npy")
seq_val = np.load("AMPLify-Feedback/processed_data/test/seq_test.npy")
state_val = np.load("AMPLify-Feedback/processed_data/test/state_test.npy")
label_val = np.load("AMPLify-Feedback/processed_data/test/label_test.npy")
seq_train = tf.one_hot(seq_train.squeeze(), depth=43)
seq_val = tf.one_hot(seq_val.squeeze(), depth=43)

In [39]:
# Generator
def build_generator(seq_length, depth, latent_dim):
    inputs0 = layers.Input(shape=(latent_dim,), name="Input0")
    inputs1 = layers.Input(shape=(326,), name="Input1")
    x1 = layers.Dense(latent_dim, activation='tanh', name="Input1Transform")(inputs1)
    x = layers.Concatenate(name="Concat")([inputs0, x1])
    x = layers.Dense(256, input_dim=latent_dim, activation='relu', name="Dense0")(x)
    x = layers.BatchNormalization(name="BatchNorm0")(x)
    x = layers.Dense(seq_length*10, activation='tanh', name="DenseResize")(x)
    x = layers.Reshape((seq_length, 10), name="Reshape")(x)
    x = layers.GRU(256, return_sequences=True, name="GRU0")(x)
    x = layers.Dense(depth, activation="softmax", name="Output")(x)
    model = tf.keras.models.Model(inputs=[inputs0, inputs1], outputs=x)
    return model

In [22]:
# Discriminator
def build_discriminator(seq_length, depth):
    model = tf.keras.Sequential(name="discriminator")
    model.add(layers.Flatten(input_shape=(seq_length, depth), name="Flatten"))
    model.add(layers.Dense(512, activation='relu', name="Dense0"))
    model.add(layers.Dense(256, activation='relu', name="Dense1"))
    model.add(layers.Dropout(0.3, name="Dropout"))
    model.add(layers.Dense(1, activation='sigmoid', name="Output"))
    return model

In [34]:
# GAN
def compile_gan(generator, discriminator):
    discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    discriminator.trainable = False
    gan_input0 = layers.Input(shape=(latent_dim,))
    gan_input1 = layers.Input(shape=(326,))
    gan_output = discriminator(generator([gan_input0, gan_input1]))
    gan = tf.keras.Model([gan_input0, gan_input1], gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='adam')
    return gan

In [35]:
def train_gan(generator, discriminator, gan, seq_train, state_train, epochs, batch_size, latent_dim):
    for epoch in range(epochs):
        for i in range(0, seq_train.shape[0], batch_size):
            real_sequences = seq_train[i:i + batch_size]
            state_train_batch = state_train[i:i + batch_size]
            current_batch_size = real_sequences.shape[0]

            # Generate Fake sequence
            noise = np.random.normal(0, 1, (current_batch_size, latent_dim))
            generated_sequences = generator.predict([noise, state_train_batch])

            # Labels for real and fake data
            real_labels = np.ones((current_batch_size, 1))
            fake_labels = np.zeros((current_batch_size, 1))

            # Train discriminator
            discriminator.trainable = True
            d_loss_real = discriminator.train_on_batch(real_sequences, real_labels)
            d_loss_fake = discriminator.train_on_batch(generated_sequences, fake_labels)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            discriminator.trainable = False

            # Train generator
            noise = np.random.normal(0, 1, (current_batch_size, latent_dim))
            g_loss = gan.train_on_batch([noise, state_train_batch], np.ones((current_batch_size, 1)))

            # Print the progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {i//batch_size+1}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")

In [40]:
latent_dim = 10  # Example latent dimension
seq_length = seq_train.shape[1]
depth = 43

generator = build_generator(seq_length, depth, latent_dim)
discriminator = build_discriminator(seq_length,depth)
gan = compile_gan(generator, discriminator)

# Train GAN
train_gan(generator, discriminator, gan, seq_train, state_train, epochs=5, batch_size=32, latent_dim=latent_dim)






[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2/5, Batch 573, Discriminator Loss: [0.21926222 0.875     ], Generator Loss: 4.992016315460205
Epoch 2/5, Batch 574, Discriminator Loss: [0.03279048 1.        ], Generator Loss: 5.042312145233154
Epoch 2/5, Batch 575, Discriminator Loss: [0.19320994 0.953125  ], Generator Loss: 4.913176536560059
Epoch 2/5, Batch 576, Discriminator Loss: [0.23422568 0.9375    ], Generator Loss: 4.365344047546387
Epoch 2/5, Batch 577, Discriminator Loss: [0.53825763 0.84375   ], Generator Loss: 2.9277520179748535
Epoch 2/5, Batch 578, Discriminator Loss: [0.37536642 0.796875  ], Generator Loss: 2.6810436248779297
Epoch 2/5, Batch 579, Discriminator Loss: [0.20698365 0.859375  ], Generator Loss: 3.1898720264434814
Epoch 2/5, Batch 580, Discriminator Loss: [0.21851708 0.9375    ], Generator Loss: 3.677459239959717
Epoch 2/5, Batch 581, Discriminator Loss: [0.29830949 0.90625   ], Generator Loss: 4.303670883178711
Epoch 2/5, Batch 582, D

In [48]:
# After GAN is trained

def generate_sequences(generator, latent_dim, num_sequences):
    noise = np.random.normal(0, 1, (num_sequences, latent_dim))
    bacteria = []
    for i in range(num_sequences):
      bacterium = np.zeros(326)
      bacterium[np.random.randint(0, 326)]=1
      bacteria.append([bacterium])
    bacteria = np.concatenate(bacteria, axis=0)
    generated_sequences = generator.predict([noise, bacteria])
    return generated_sequences


In [49]:
generated_seqs = generate_sequences(generator, latent_dim, num_sequences=10)



In [50]:
generated_seqs

array([[[1.23037478e-06, 5.83203359e-07, 8.68971467e-01, ...,
         5.97243229e-07, 5.98900670e-07, 6.35690469e-07],
        [2.41132773e-11, 3.43180588e-11, 9.89544019e-02, ...,
         6.72063655e-11, 6.62012875e-11, 3.73573499e-11],
        [1.11367187e-12, 3.66225774e-14, 2.38808021e-02, ...,
         9.34872771e-14, 4.81747380e-14, 2.39406493e-14],
        ...,
        [1.00000000e+00, 1.84903184e-15, 1.22987176e-21, ...,
         9.32774810e-16, 1.07761363e-15, 1.07854304e-15],
        [1.00000000e+00, 1.79245089e-15, 4.78420233e-22, ...,
         8.98417354e-16, 1.09806313e-15, 1.11484465e-15],
        [1.00000000e+00, 1.73996905e-15, 1.21332431e-21, ...,
         9.10555548e-16, 9.89594627e-16, 1.03314938e-15]],

       [[1.77938227e-06, 6.25152597e-06, 7.24894226e-01, ...,
         6.50827542e-06, 6.30421755e-06, 6.06587673e-06],
        [2.25469117e-15, 5.84417172e-13, 7.66347162e-03, ...,
         1.09522005e-12, 6.32549514e-13, 6.38562174e-13],
        [1.73543853e-11, 

In [52]:
generator.save("AMPLify-Feedback/model_weights/PeptideGenerator.keras")

In [None]:
# np.save("/content/AMPLify-Feedback/processed_data/GAN_seq/generated_seqs_10",generated_seqs)

In [None]:
#Convert to One_hot
# generated_seqs_one_hot = tf.one_hot(generated_seqs.squeeze(), depth=43)
# np.save("/content/AMPLify-Feedback/processed_data/GAN_seq/generated_seqs_one_hot_10",generated_seqs_one_hot)