In [1]:
#Simulated gene expression data classification using Transformers
#An example purely conceived of ChatGPT


import numpy as np

# Generate synthetic gene expression data
def generate_gene_expression_data(num_samples, num_genes):
    data = np.random.rand(num_samples, num_genes)  # You can replace this with real gene expression data
    labels = np.random.randint(0, 2, size=num_samples)  # Random binary labels for demonstration
    return data, labels

# Create the synthetic gene expression dataset
num_samples = 1000
num_genes = 50
num_classes = 2

data, labels = generate_gene_expression_data(num_samples, num_genes)

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def transformer_model(input_shape, num_classes, d_model=128, num_heads=4, ff_dim=128, dropout=0.1):
    inputs = Input(shape=input_shape)
    x = inputs

    # Positional encoding
    positions = tf.range(start=0, limit=input_shape[1], delta=1)
    positional_encoding = 1 / tf.pow(10000, 2 * tf.cast(tf.range(d_model // 2), tf.float32) / d_model)
    positional_encoding = tf.expand_dims(positional_encoding, 0)
    positional_encoding = tf.expand_dims(positional_encoding, 1)
    position_indices = tf.expand_dims(positions, 1)
    positional_encoding = tf.matmul(tf.cast(position_indices, tf.float32), positional_encoding)  # Cast to float32
    positional_encoding = tf.concat([tf.cos(positional_encoding), tf.sin(positional_encoding)], axis=-1)
    x += positional_encoding

    # Transformer Encoder
    for _ in range(num_heads):
        # Multi-head self-attention mechanism
        attention_out = tf.keras.layers.MultiHeadAttention(key_dim=d_model // num_heads, num_heads=num_heads, dropout=dropout)(x, x)
        # Add and normalize
        x = tf.keras.layers.Add()([attention_out, x])
        x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

        # Feed Forward Part
        ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(d_model),
        ])
        ffn_out = ffn(x)
        # Add and normalize
        x = tf.keras.layers.Add()([ffn_out, x])
        x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

    # Global average pooling
    x = tf.keras.layers.GlobalAveragePooling1D(data_format='channels_first')(x)
    x = Dropout(dropout)(x)

    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    return model

model = transformer_model((num_genes, 1), num_classes)

In [4]:
# Split the data into training and testing sets
split_ratio = 0.8
split_idx = int(num_samples * split_ratio)

x_train, y_train = data[:split_idx], labels[:split_idx]
x_test, y_test = data[split_idx:], labels[split_idx:]

# Compile the model
model.compile(optimizer=Adam(lr=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 10

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.6973, Test Accuracy: 0.4600
