**GAN**

In [1]:
!git clone https://github.com/AvonYangXX1/AMPLify-Feedback.git
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

Cloning into 'AMPLify-Feedback'...
remote: Enumerating objects: 335, done.[K
remote: Counting objects: 100% (335/335), done.[K
remote: Compressing objects: 100% (209/209), done.[K
remote: Total 335 (delta 155), reused 293 (delta 121), pack-reused 0[K
Receiving objects: 100% (335/335), 12.30 MiB | 1.93 MiB/s, done.
Resolving deltas: 100% (155/155), done.
Updating files: 100% (56/56), done.


In [2]:
seq_train = np.load("AMPLify-Feedback/processed_data/test/seq_cv.npy")
state_train = np.load("AMPLify-Feedback/processed_data/test/state_cv.npy")
label_train = np.load("AMPLify-Feedback/processed_data/test/label_cv.npy")
seq_val = np.load("AMPLify-Feedback/processed_data/test/seq_test.npy")
state_val = np.load("AMPLify-Feedback/processed_data/test/state_test.npy")
label_val = np.load("AMPLify-Feedback/processed_data/test/label_test.npy")
seq_train = tf.one_hot(seq_train.squeeze(), depth=43)
seq_val = tf.one_hot(seq_val.squeeze(), depth=43)

In [73]:
# Generator
def build_generator(seq_length, depth, latent_dim):
    inputs0 = layers.Input(shape=(latent_dim,), name="Input0")
    inputs1 = layers.Input(shape=(326,), name="Input1")
    x1 = layers.Dense(latent_dim, activation='tanh', name="Input1Transform")(inputs1)
    x = layers.Concatenate(name="Concat")([inputs0, x1])
    x = layers.Dense(256, input_dim=latent_dim, activation='relu', name="Dense0")(x)
    x = layers.BatchNormalization(name="BatchNorm0")(x)
    x = layers.Dense(seq_length*10, activation='tanh', name="DenseResize")(x)
    x = layers.Reshape((seq_length, 10), name="Reshape")(x)
    x = layers.GRU(256, return_sequences=True, name="GRU0")(x)
    x = layers.Dense(depth, activation="softmax", name="Output")(x)
    model = tf.keras.models.Model(inputs=[inputs0, inputs1], outputs=x)
    return model

In [74]:
# Discriminator
def build_discriminator(seq_length, depth):
    model = tf.keras.Sequential(name="discriminator")
    model.add(layers.Flatten(name="Flatten"))
    model.add(layers.Dense(512, activation='relu', name="Dense0"))
    model.add(layers.Dropout(0.3, name="Dropout"))
    model.add(layers.Dense(256, activation='relu', name="Dense1"))
    model.add(layers.Dense(1, activation='sigmoid', name="Output"))
    return model

In [75]:
# GAN
def compile_gan(generator, discriminator):
    discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    discriminator.trainable = False
    gan_input0 = layers.Input(shape=(latent_dim,))
    gan_input1 = layers.Input(shape=(326,))
    gan_output = discriminator(generator([gan_input0, gan_input1]))
    gan = tf.keras.Model([gan_input0, gan_input1], gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='adam')
    return gan

In [76]:
def train_gan(generator, discriminator, gan, seq_train, state_train, epochs, batch_size, latent_dim):
    for epoch in range(epochs):
        for i in range(0, seq_train.shape[0], batch_size):
            real_sequences = seq_train[i:i + batch_size]
            state_train_batch = state_train[i:i + batch_size]
            current_batch_size = real_sequences.shape[0]

            # Generate Fake sequence
            noise = np.random.normal(0, 1, (current_batch_size, latent_dim))
            generated_sequences = generator.predict([noise, state_train_batch])

            # Labels for real and fake data
            real_labels = np.ones((current_batch_size, 1))
            fake_labels = np.zeros((current_batch_size, 1))

            # Train discriminator
            discriminator.trainable = True
            d_loss_real = discriminator.train_on_batch(real_sequences, real_labels)
            d_loss_fake = discriminator.train_on_batch(generated_sequences, fake_labels)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            discriminator.trainable = False

            # Train generator
            noise = np.random.normal(0, 1, (current_batch_size, latent_dim))
            g_loss = gan.train_on_batch([noise, state_train_batch], np.ones((current_batch_size, 1)))

            # Print the progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {i//batch_size+1}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")

In [77]:
latent_dim = 10  # Example latent dimension
seq_length = seq_train.shape[1]
depth = 43

generator = build_generator(seq_length, depth, latent_dim)
discriminator = build_discriminator(seq_length,depth)
gan = compile_gan(generator, discriminator)

# Train GAN
train_gan(generator, discriminator, gan, seq_train, state_train, epochs=5, batch_size=32, latent_dim=latent_dim)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2/5, Batch 573, Discriminator Loss: [0.36538354 0.8125    ], Generator Loss: 1.7672841548919678
Epoch 2/5, Batch 574, Discriminator Loss: [0.24205296 0.890625  ], Generator Loss: 1.9122953414916992
Epoch 2/5, Batch 575, Discriminator Loss: [0.36531254 0.84375   ], Generator Loss: 2.35707950592041
Epoch 2/5, Batch 576, Discriminator Loss: [0.14863306 0.96875   ], Generator Loss: 2.6924960613250732
Epoch 2/5, Batch 577, Discriminator Loss: [0.42398981 0.84375   ], Generator Loss: 2.5050158500671387
Epoch 2/5, Batch 578, Discriminator Loss: [0.60645197 0.78125   ], Generator Loss: 2.4036872386932373
Epoch 2/5, Batch 579, Discriminator Loss: [0.45938087 0.765625  ], Generator Loss: 1.9512091875076294
Epoch 2/5, Batch 580, Discriminator Loss: [0.27226776 0.90625   ], Generator Loss: 1.6184378862380981
Epoch 2/5, Batch 581, Discriminator Loss: [0.16689982 1.        ], Generator Loss: 1.2878413200378418
Epoch 2/5, Batch 58

In [48]:
# After GAN is trained

def generate_sequences(generator, latent_dim, num_sequences):
    noise = np.random.normal(0, 1, (num_sequences, latent_dim))
    bacteria = []
    for i in range(num_sequences):
      bacterium = np.zeros(326)
      bacterium[np.random.randint(0, 326)]=1
      bacteria.append([bacterium])
    bacteria = np.concatenate(bacteria, axis=0)
    generated_sequences = generator.predict([noise, bacteria])
    return generated_sequences


In [78]:
generated_seqs = generate_sequences(generator, latent_dim, num_sequences=10)



In [80]:
generated_seqs_argmax = tf.math.argmax(generated_seqs, axis=2)

In [83]:
generated_seqs_argmax[9]

<tf.Tensor: shape=(190,), dtype=int64, numpy=
array([ 5, 10,  2, 25,  8,  3,  4,  4,  2,  2,  3,  3,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0])>

In [84]:
generator.save("AMPLify-Feedback/model_weights/PeptideGenerator.keras")

In [None]:
# np.save("/content/AMPLify-Feedback/processed_data/GAN_seq/generated_seqs_10",generated_seqs)

In [None]:
#Convert to One_hot
# generated_seqs_one_hot = tf.one_hot(generated_seqs.squeeze(), depth=43)
# np.save("/content/AMPLify-Feedback/processed_data/GAN_seq/generated_seqs_one_hot_10",generated_seqs_one_hot)