In [6]:
import numpy as np

In [7]:
#the activation functions
#used for models where we have to predict the probability as an output
#useful for binary classification tasks
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [8]:
def initialize_weights(input_dim, hidden_dim, output_dim):
    #initializing weights with small random numbers
    #initializing biases with zeros
    W1 = np.random.randn(input_dim, hidden_dim) * 0.01
    b1 = np.zeros((1, hidden_dim))
    W2 = np.random.randn(hidden_dim, output_dim) * 0.01
    b2 = np.zeros((1, output_dim))
    return W1, b1, W2, b2

Generator

In [9]:
#The generator generates fake data

def generator_forward(z, W1, b1, W2, b2):
    #the forward pass
    #z is a random noise vector
    #h are the hidden layer activations
    h = sigmoid(np.dot(z, W1) + b1)
    generated_data = sigmoid(np.dot(h, W2) + b2)
    return generated_data


def generator_backward(z, generated_data, d_loss, W1, b1, W2, b2):
    #the backward pass
    #computes the gradients for the generator's weights and biases using the loss gradient d_loss.
    h = sigmoid(np.dot(z, W1) + b1)
    d_output = d_loss * sigmoid_derivative(generated_data)

    #the gradients
    #a gradient measures the change in all weights with regard to the change in error or loss
    dW2 = np.dot(h.T, d_output)
    db2 = np.sum(d_output, axis=0, keepdims=True)
    dh = np.dot(d_output, W2.T) * sigmoid_derivative(h)

    dW1 = np.dot(z.T, dh)
    db1 = np.sum(dh, axis=0, keepdims=True)

    return dW1, db1, dW2, db2

Discriminator

In [10]:
#The discriminator classifies fake and real data
def discriminator_forward(x, W1, b1, W2, b2):
    #forward pass
    #x is real or generated data given as input
    #h are the hidden layer activations
    h = sigmoid(np.dot(x, W1) + b1)
    y = sigmoid(np.dot(h, W2) + b2)
    return y


def discriminator_backward(x, y_true, y_pred, W1, b1, W2, b2):
    #backward pass
    #calculates the gradients for the discriminator's weights and biases
    #uses the true labels y_true and predicted labels y_pred
    h = sigmoid(np.dot(x, W1) + b1)
    d_output = (y_pred - y_true) * sigmoid_derivative(y_pred)

    #the gradients
    #a gradient measures the change in all weights with regard to the change in error or loss
    dW2 = np.dot(h.T, d_output)
    db2 = np.sum(d_output, axis=0, keepdims=True)
    dh = np.dot(d_output, W2.T) * sigmoid_derivative(h)

    dW1 = np.dot(x.T, dh)
    db1 = np.sum(dh, axis=0, keepdims=True)

    return dW1, db1, dW2, db2

Training

In [11]:
def train_gan(real_data, input_dim, hidden_dim, output_dim, latent_dim, epochs=1000, learning_rate=0.01):
    #initializing random weights and biases for both the generator and discriminator
    g_W1, g_b1, g_W2, g_b2 = initialize_weights(latent_dim, hidden_dim, output_dim)
    d_W1, d_b1, d_W2, d_b2 = initialize_weights(output_dim, hidden_dim, 1)

    #the training loop
    #epoch is the number of times you give the data to the neursl network
    #in other words, an epoch means training the neural network with all the training data for one cycle.
    for epoch in range(epochs):
        for real_sample in real_data:
            #training the Discriminator
            z = np.random.randn(1, latent_dim)
            #generating fake data
            generated_data = generator_forward(z, g_W1, g_b1, g_W2, g_b2)

            real_sample = real_sample.reshape(1, -1)
            #calculating discriminator's output for both real and fake data
            d_real = discriminator_forward(real_sample, d_W1, d_b1, d_W2, d_b2)
            d_fake = discriminator_forward(generated_data, d_W1, d_b1, d_W2, d_b2)

            #calculating discriminator loss for both real and fake data
            d_loss_real = -np.log(d_real)
            d_loss_fake = -np.log(1 - d_fake)
            d_loss = d_loss_real + d_loss_fake

            d_loss_gradient_real = (d_real - 1) / (d_real * (1 - d_real))
            d_loss_gradient_fake = d_fake / (d_fake * (1 - d_fake))

            #calculating discriminator gradients for both real and fake data
            dW1_real, db1_real, dW2_real, db2_real = discriminator_backward(real_sample, np.ones((1, 1)), d_real, d_W1, d_b1, d_W2, d_b2)
            dW1_fake, db1_fake, dW2_fake, db2_fake = discriminator_backward(generated_data, np.zeros((1, 1)), d_fake, d_W1, d_b1, d_W2, d_b2)

            #updating discriminator's weights and biases using gradient descent
            #Gradient Descent is an algorithm that is used to optimize the cost function or the error of the model
            d_W1 -= learning_rate * (dW1_real + dW1_fake)
            d_b1 -= learning_rate * (db1_real + db1_fake)
            d_W2 -= learning_rate * (dW2_real + dW2_fake)
            d_b2 -= learning_rate * (db2_real + db2_fake)

            #training the Generator
            z = np.random.randn(1, latent_dim)
            #generating fake data using the generator
            generated_data = generator_forward(z, g_W1, g_b1, g_W2, g_b2)
            #calculating discriminator's output for the fake data
            d_fake = discriminator_forward(generated_data, d_W1, d_b1, d_W2, d_b2)

            #calculating generator loss or error
            g_loss = -np.log(d_fake)
            d_loss_gradient = (d_fake - 1) / (d_fake * (1 - d_fake))

            ##calculating generator gradients
            dW1_gen, db1_gen, dW2_gen, db2_gen = discriminator_backward(generated_data, np.ones((1, 1)), d_fake, d_W1, d_b1, d_W2, d_b2)
            dW1_gen, db1_gen, dW2_gen, db2_gen = generator_backward(z, generated_data, d_loss_gradient, g_W1, g_b1, g_W2, g_b2)

            #updating generator's weights and biases using gradient descent
            #Gradient Descent is an algorithm that is used to optimize the cost function or the error of the model
            g_W1 -= learning_rate * dW1_gen
            g_b1 -= learning_rate * db1_gen
            g_W2 -= learning_rate * dW2_gen
            g_b2 -= learning_rate * db2_gen

        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}')

    return g_W1, g_b1, g_W2, g_b2, d_W1, d_b1, d_W2, d_b2

Implementing

In [12]:
#Example data (randomly generated)
real_data = np.random.rand(100, 3)  # 00 samples of real data, each with 3 features

#Train the GAN
g_W1, g_b1, g_W2, g_b2, d_W1, d_b1, d_W2, d_b2 = train_gan(real_data, input_dim=3, hidden_dim=10, output_dim=3, latent_dim=5, epochs=1000, learning_rate=0.01)

Epoch 0, Discriminator Loss: [[1.38632109]], Generator Loss: [[0.69051012]]
Epoch 100, Discriminator Loss: [[1.38374686]], Generator Loss: [[0.70011644]]
Epoch 200, Discriminator Loss: [[1.32341788]], Generator Loss: [[0.99885128]]
Epoch 300, Discriminator Loss: [[1.33392437]], Generator Loss: [[1.5170313]]
Epoch 400, Discriminator Loss: [[1.33800336]], Generator Loss: [[1.81607187]]
Epoch 500, Discriminator Loss: [[1.31943984]], Generator Loss: [[1.99952668]]
Epoch 600, Discriminator Loss: [[1.28885323]], Generator Loss: [[2.12773211]]
Epoch 700, Discriminator Loss: [[1.25181764]], Generator Loss: [[2.22478225]]
Epoch 800, Discriminator Loss: [[1.21103261]], Generator Loss: [[2.30189018]]
Epoch 900, Discriminator Loss: [[1.16799473]], Generator Loss: [[2.36520584]]
