<a href="https://colab.research.google.com/github/jayantsharma12/AI-Generated-Synthetic-Data-Generation-and-Detection/blob/main/GAN_alzi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



In [None]:
import os
import zipfile
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, Reshape, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
# Function to extract zip files
def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

In [None]:
# Function to extract MFCC features from audio files
def extract_features(audio_path, n_mfcc=13):
    y, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs.T, axis=0)

# Function to process audio files and extract features and labels
def process_audio_files(zip_path, extract_to, label):
    extract_zip(zip_path, extract_to)
    features, labels = [], []
    for root, _, files in os.walk(extract_to):
        for file in files:
            if file.endswith(".wav"):
                audio_path = os.path.join(root, file)
                mfccs = extract_features(audio_path)
                features.append(mfccs)
                labels.append(label)
    return np.array(features), np.array(labels)

In [None]:
# Input paths
alzheimer_zip_path = input("Enter the path for Alzheimer zipped file: ")
other_zip_path = input("Enter the path for other disease zipped file: ")

# Directories for extracted files
alzheimer_dir = "extracted_alzheimer_files"
other_dir = "extracted_other_files"

Enter the path for Alzheimer zipped file: /content/cc-20241018T045139Z-001.zip
Enter the path for other disease zipped file: /content/cd-20241018T045139Z-001.zip


In [None]:
# Process audio files and extract features and labels
alzheimer_features, alzheimer_labels = process_audio_files(alzheimer_zip_path, alzheimer_dir, 1)
other_features, other_labels = process_audio_files(other_zip_path, other_dir, 0)

In [None]:
# Combine data
X_real = np.vstack((alzheimer_features, other_features))
y_real = np.hstack((alzheimer_labels, other_labels))

In [None]:
# Normalize the feature data
X_real = (X_real - np.min(X_real)) / (np.max(X_real) - np.min(X_real))

In [None]:
# GAN parameters
latent_dim = 100  # Size of the noise vector for the generator
n_features = X_real.shape[1]

In [None]:
# Generator Model
def build_generator(latent_dim, n_features):
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(n_features, activation='tanh'))  # Generate feature vector
    return model


In [None]:
# Discriminator Model
def build_discriminator(n_features):
    model = Sequential()
    model.add(Dense(256, input_dim=n_features))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(128))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))  # Output probability (real/fake)
    return model

In [None]:
# GAN Model
def build_gan(generator, discriminator):
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

# Create Generator and Discriminator
generator = build_generator(latent_dim, n_features)
discriminator = build_discriminator(n_features)

# Compile the Discriminator
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

# Freeze the Discriminator's weights when training the GAN
discriminator.trainable = False

# Build and compile the GAN
gan = build_gan(generator, discriminator)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Define the training loop
def train_gan(generator, discriminator, gan, X_real, latent_dim, epochs=1000, batch_size=32):
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train Discriminator
        # Generate fake data
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        X_fake = generator.predict(noise)

        # Select a random half batch of real data
        idx = np.random.randint(0, X_real.shape[0], half_batch)
        X_real_batch = X_real[idx]

        # Labels for real and fake data
        y_real = np.ones((half_batch, 1))  # Label for real data is 1
        y_fake = np.zeros((half_batch, 1))  # Label for fake data is 0

        # Train the discriminator on real and fake data separately
        d_loss_real = discriminator.train_on_batch(X_real_batch, y_real)
        d_loss_fake = discriminator.train_on_batch(X_fake, y_fake)

        # Combine the loss of real and fake samples for Discriminator
        d_loss = 0.5 * np.add(d_loss_real[0], d_loss_fake[0])  # Take the loss part
        d_acc = 0.5 * np.add(d_loss_real[1], d_loss_fake[1])    # Take the accuracy part

        # Train Generator
        # Generate fake data (Generator tries to fool the Discriminator)
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        y_gan = np.ones((batch_size, 1))  # Label for fake data as real (this tricks the GAN)

        # Train the GAN (Generator part only)
        g_loss = gan.train_on_batch(noise, y_gan)

        # If `g_loss` is a list, take the first element (the loss value)
        if isinstance(g_loss, list):
            g_loss = g_loss[0]

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch+1}/{epochs}, D Loss: {d_loss:.4f}, D Accuracy: {d_acc*100:.2f}%, G Loss: {g_loss:.4f}")

In [None]:
# Train the GAN
train_gan(generator, discriminator, gan, X_real, latent_dim, epochs=100, batch_size=64)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306ms/step




Epoch 1/100, D Loss: 0.7787, D Accuracy: 14.84%, G Loss: 0.7486
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19

In [None]:
### 3. **Generating New Samples**:

# Generate new fake audio features
def generate_fake_samples(generator, latent_dim, n_samples):
    noise = np.random.normal(0, 1, (n_samples, latent_dim))
    generated_features = generator.predict(noise)
    return generated_features


In [None]:
# Generate 5 new samples
new_samples = generate_fake_samples(generator, latent_dim, 5)
print("Generated MFCC feature samples:")
print(new_samples)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Generated MFCC feature samples:
[[-0.9741972  -0.98081654  0.7426176   0.96614695 -0.9871568   0.9200992
  -0.9609494  -0.89043736 -0.9881929  -0.9753078  -0.96215665 -0.9759033
   0.9786223 ]
 [-0.991686   -0.9936444   0.6942657   0.98508745 -0.99630713  0.9554992
  -0.9969533  -0.9194284  -0.99558824 -0.9959385  -0.99322945 -0.988945
   0.9780799 ]
 [-0.9825051  -0.98205996  0.54171836  0.9900956  -0.98479474  0.94681215
  -0.9891576  -0.95227563 -0.9904936  -0.9963983  -0.98688877 -0.9829987
   0.9788618 ]
 [-0.9546721  -0.97713494  0.86864376  0.9655746  -0.9917264   0.9086136
  -0.9633202  -0.86070985 -0.9796195  -0.9776909  -0.96825755 -0.986827
   0.98681086]
 [-0.9321609  -0.9895056   0.79580027  0.9712639  -0.988333    0.9351603
  -0.8934144  -0.9399865  -0.9841299  -0.9710156  -0.99213815 -0.99100083
   0.9683604 ]]
