<a href="https://colab.research.google.com/github/efitzgerald763/snRNAseq_ssGSEA_DE/blob/main/GAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd

counts_path = '/content/drive/My Drive/Colab Notebooks/Pseudobulked_MDD/filtered_counts_matrix.csv'
meta_path = '/content/drive/My Drive/Colab Notebooks/Pseudobulked_MDD/filtered_counts_matrix_METADATA.csv'

data = pd.read_csv(counts_path)
metadata = pd.read_csv(meta_path)

# Set the index to the first column
data.set_index(data.columns[0], inplace=True)
metadata.set_index(metadata.columns[0], inplace=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
data.head()

Unnamed: 0_level_0,F1,F10,F11,F12,F13,F14,F15,F16,F17,F2,...,F34,F35,F36,F37,F38,F4,F5,F6,F7,F8
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Microglia_AL627309.1,0.015464,0.030303,0.0,0.0,0.0,0.011494,0.0,0.0,0.010152,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.004444,0.0,0.0,0.022727
Microglia_AL627309.5,0.005155,0.030303,0.018182,0.113636,0.045455,0.0,0.0,0.0,0.020305,0.015385,...,0.018018,0.0,0.021583,0.0,0.028571,0.005464,0.022222,0.0,0.0,0.068182
Microglia_LINC01409,0.010309,0.090909,0.0,0.090909,0.0,0.045977,0.018182,0.0,0.020305,0.0,...,0.036036,0.058824,0.035971,0.0,0.114286,0.04918,0.088889,0.0,0.0,0.022727
Microglia_FAM87B,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.015385,...,0.0,0.0,0.0,0.0,0.0,0.0,0.004444,0.0,0.0,0.022727
Microglia_LINC01128,0.041237,0.151515,0.072727,0.022727,0.272727,0.022989,0.072727,0.0,0.025381,0.030769,...,0.099099,0.0,0.021583,0.0,0.0,0.032787,0.053333,0.058824,0.0,0.204545


In [13]:
metadata.head()

Unnamed: 0_level_0,Condition,Batch,Chemistry,Sequencing,Age,Race,Sex
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F1,Case,8F,v3,Nova,25,Caucasian,Female
F11,Case,8F,v3,Nova,41,Caucasian,Female
F34,Control,8F,v3,Nova,20,AA,Female
F35,Control,8F,v3,Nova,28,W,Female
F36,Control,8F,v3,Nova,44,W/H,Female


In [14]:
# Transpose the data DataFrame so that sample names are in the row index
data = data.transpose()

# Align data and metadata based on sample IDs
data = data.loc[metadata.index]

# Normalize the gene expression data
data = (data - data.min()) / (data.max() - data.min())

# Encode the 'Condition' column in the metadata
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
metadata['Condition'] = label_encoder.fit_transform(metadata['Condition'])

# Extract the relevant arrays for training
X_train = data.values  # Gene expression data
y_train = metadata['Condition'].values  # Condition labels

In [15]:
import tensorflow as tf
from tensorflow.keras import layers

def build_generator(input_dim, condition_dim, output_dim):
    noise = layers.Input(shape=(input_dim,))
    condition = layers.Input(shape=(condition_dim,))

    x = layers.Concatenate()([noise, condition])
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(output_dim, activation='sigmoid')(x)

    return tf.keras.Model([noise, condition], x)

def build_discriminator(input_dim, condition_dim):
    gene_expression = layers.Input(shape=(input_dim,))
    condition = layers.Input(shape=(condition_dim,))

    x = layers.Concatenate()([gene_expression, condition])
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([gene_expression, condition], x)

# Parameters
input_dim = 100  # Size of the noise vector
condition_dim = 1  # Size of the condition vector (0 or 1)
output_dim = X_train.shape[1]  # Number of genes

# Create the generator and discriminator models
generator = build_generator(input_dim, condition_dim, output_dim)
discriminator = build_discriminator(output_dim, condition_dim)

# Compile the discriminator
discriminator.compile(loss='binary_crossentropy', optimizer='adam')

# Build the GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    noise = layers.Input(shape=(input_dim,))
    condition = layers.Input(shape=(condition_dim,))
    generated_sample = generator([noise, condition])
    validity = discriminator([generated_sample, condition])

    return tf.keras.Model([noise, condition], validity)

gan = build_gan(generator, discriminator)
gan.compile(loss='binary_crossentropy', optimizer='adam')


In [1]:
epochs = 100
batch_size = 8

# Create arrays of labels for real and fake samples
real = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))

for epoch in range(epochs):
    # Select a random batch of real samples
    idx = np.random.randint(0, X_train.shape[0], batch_size)
    real_samples = X_train[idx]
    labels = y_train[idx].reshape(-1, 1)

    # Generate fake samples
    noise = np.random.normal(0, 1, (batch_size, input_dim))
    gen_samples = generator.predict([noise, labels])

    # Train the discriminator
    d_loss_real = discriminator.train_on_batch([real_samples, labels], real)
    d_loss_fake = discriminator.train_on_batch([gen_samples, labels], fake)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, input_dim))
    g_loss = gan.train_on_batch([noise, labels], real)

    # Print progress
    if epoch % 100 == 0:
        print(f"Epoch {epoch} / {epochs} | D Loss: {d_loss} | G Loss: {g_loss}")

print("Training completed.")


NameError: name 'np' is not defined