In [29]:
import torch
import torch.nn as nn
import torch.optim as optim

# Simple Discriminator (Critic) Model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1)
        )

    def forward(self, x):
        return self.model(x)

# Simple Generator Model
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 512),
            nn.ReLU(),
            nn.Linear(512, 784),
            nn.Tanh()
        )

    def forward(self, z):
        return self.model(z)




def gradient_penalty(D, real, fake, device):
  alpha = torch.randn(real.size(0), 1).to(device)
  interpolated_samples = alpha * real + ((1-alpha) * fake)
  interpolated_samples = interpolated_samples.requires_grad_(True)

  d_interploated = D(interpolated_samples)

  gradients = torch.autograd.grad(
      outputs=d_interploated,
      inputs=interpolated_samples,
      grad_outputs=torch.ones_like(d_interploated),
      retain_graph=True,
      create_graph=True

  )[0] # As it return in tuple

  gradients = torch.norm(gradients, 2, dim = 1)
  penalty = (gradients - 1)**2

  return penalty

# # Gradient Penalty Function
# def compute_gradient_penalty(D, real_samples, fake_samples):
#     batch_size = real_samples.size(0)
#     # Random weight term for interpolation between real and fake samples
#     alpha = torch.rand(batch_size, 1, device=real_samples.device)
#     # Interpolate between real and fake samples
#     interpolates = alpha * real_samples + ((1 - alpha) * fake_samples)
#     interpolates.requires_grad_(True)

#     d_interpolates = D(interpolates)

#     gradients = torch.autograd.grad(
#         outputs=d_interpolates,
#         inputs=interpolates,
#         grad_outputs=torch.ones_like(d_interpolates),
#         create_graph=True,
#         retain_graph=True
#     )[0]

#     gradients = gradients.view(batch_size, -1)
#     gradient_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1))
#     gradient_penalty = (gradient_norm - 1) ** 2
#     return gradient_penalty



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
D = Discriminator().to(device)
G = Generator().to(device)
gradient_penalty(D, torch.randn(1, 784), torch.randn(1, 784), device)

tensor([0.5486], grad_fn=<PowBackward0>)

In [19]:
import torch

# Create a 2D tensor (matrix)
tensor = torch.tensor([[1.0, 2.0, 3.0],
                       [4.0, 5.0, 6.0]])

# Compute L2 norm along dimension 0
norm_dim0 = torch.norm(tensor, p=2, dim=0)
print("L2 Norm along dimension 0:", norm_dim0)

# Compute L2 norm along dimension 1
norm_dim1 = torch.norm(tensor, p=2, dim=1)
print("L2 Norm along dimension 1:", norm_dim1)

L2 Norm along dimension 0: tensor([4.1231, 5.3852, 6.7082])
L2 Norm along dimension 1: tensor([3.7417, 8.7750])


In [39]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Conv2D-based Discriminator (Critic) Model for RGB images
class ConvDiscriminator(nn.Module):
    def __init__(self):
        super(ConvDiscriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=0)
        )

    def forward(self, x):
        return self.model(x)

# Gradient Penalty Function
def compute_gradient_penalty(D, real_samples, fake_samples):
    batch_size, c, h, w = real_samples.shape

    # Random weight term for interpolation between real and fake samples
    alpha = torch.rand(batch_size, 1, 1, 1, device=real_samples.device)
    # Interpolate between real and fake samples
    interpolates = alpha * real_samples + (1 - alpha) * fake_samples
    interpolates.requires_grad_(True)

    # Calculate the discriminator's output on the interpolated samples
    d_interpolates = D(interpolates)

    # Compute gradients of the discriminator's output with respect to the interpolated samples
    gradients = torch.autograd.grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=torch.ones_like(d_interpolates),
        create_graph=True,
        retain_graph=True
    )[0]

    # Reshape the gradients to (batch_size, num_elements)
    gradients = gradients.view(batch_size, -1)
    # Compute the L2 norm of the gradients for each sample in the batch
    gradient_norm = gradients.norm(2, dim=1)
    # Compute the gradient penalty
    gradient_penalty = ((gradient_norm - 1) ** 2).mean()
    return gradient_penalty

# Sample usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
D = ConvDiscriminator().to(device)

real_samples = torch.randn(1, 3, 64, 64, device=device)  # Example real samples (batch_size=5, 3 channels, 64x64 image)
fake_samples = torch.randn(1, 3, 64, 64, device=device)  # Example fake samples

gradient_penalty = compute_gradient_penalty(D, real_samples, fake_samples)
print("Gradient Penalty:", gradient_penalty)

Gradient Penalty: tensor(3.4620, grad_fn=<MeanBackward0>)


In [70]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Conv2D-based Discriminator (Critic) Model for RGB images
class ConvDiscriminator(nn.Module):
    def __init__(self):
        super(ConvDiscriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=0)
        )

    def forward(self, x):
        return self.model(x)

# Gradient Penalty Function
def compute_gradient_penalty(D, real_samples, fake_samples):
    batch_size, c, h, w = real_samples.shape

    # Random weight term for interpolation between real and fake samples
    alpha = torch.rand(batch_size, 3, 64, 64, device=real_samples.device)
    # Interpolate between real and fake samples
    interpolates = alpha * real_samples + (1 - alpha) * fake_samples
    interpolates.requires_grad_(True)

    # Calculate the discriminator's output on the interpolated samples
    d_interpolates = D(interpolates)

    # Compute gradients of the discriminator's output with respect to the interpolated samples
    gradients = torch.autograd.grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=torch.ones_like(d_interpolates),
        create_graph=True,
        retain_graph=True
    )[0]

    # Reshape the gradients to (batch_size, num_elements)
    print(gradients.size())
    # gradients = gradients.view(batch_size, -1)
    # Compute the L2 norm of the gradients for each sample in the batch
    gradient_norm = gradients.norm(2, dim=1)
    # Compute the gradient penalty
    gradient_penalty = ((gradient_norm - 1) ** 2).mean()
    return gradients

# Sample usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
D = ConvDiscriminator().to(device)

real_samples = torch.randn(1, 3, 64, 64, device=device)  # Example real samples (batch_size=5, 3 channels, 64x64 image)
fake_samples = torch.randn(1, 3, 64, 64, device=device)  # Example fake samples

gradient_penalty = compute_gradient_penalty(D, real_samples, fake_samples)
print("Gradient Penalty:", gradient_penalty)

torch.Size([1, 3, 64, 64])
Gradient Penalty: tensor([[[[ 4.4321e-03,  4.1723e-03,  1.7816e-03,  ...,  5.4069e-03,
           -3.8142e-03, -8.7445e-04],
          [-4.3623e-05, -7.5737e-03,  1.4225e-03,  ..., -1.8151e-03,
           -7.5461e-04, -6.7127e-03],
          [ 2.1676e-03, -3.4768e-03, -6.8543e-03,  ..., -1.1667e-02,
           -5.7196e-03,  3.6098e-04],
          ...,
          [-4.7825e-03,  4.3432e-04,  1.9166e-02,  ..., -7.1255e-03,
            9.5771e-03,  6.7436e-03],
          [ 1.3610e-03, -1.8163e-03, -2.8733e-03,  ..., -1.1494e-02,
            1.8334e-03, -2.4156e-03],
          [ 1.2208e-03, -1.2457e-03, -1.4444e-03,  ..., -3.5302e-04,
           -1.2836e-03,  1.9308e-03]],

         [[-3.0312e-03,  1.3824e-03, -1.5471e-03,  ..., -1.2368e-03,
           -3.0324e-03, -1.2677e-03],
          [-6.0652e-03, -1.4605e-03, -1.1349e-02,  ...,  6.0605e-03,
            1.4260e-02,  3.4764e-03],
          [ 1.9638e-03, -8.1987e-03,  3.1535e-04,  ...,  1.5996e-02,
            6

In [82]:
import torch

X = torch.tensor([5.0, 2.0], requires_grad=True)
y = torch.tensor([2.0, 5.0], requires_grad=True)

# Define the equation
equation = 3.0 * X ** 2.0 + 2.0 * y * 2.0

# Compute gradients
gradients = torch.autograd.grad(
    outputs=equation,
    inputs=[X, y],
    grad_outputs=torch.ones_like(equation),  # Ensure grad_outputs matches the shape of `equation`
    create_graph=True,
    retain_graph=True
)

print(gradients)

print("Gradients with respect to X:", gradients[0])
print("Gradients with respect to y:", gradients[1])

(tensor([30., 12.], grad_fn=<MulBackward0>), tensor([4., 4.]))
Gradients with respect to X: tensor([30., 12.], grad_fn=<MulBackward0>)
Gradients with respect to y: tensor([4., 4.])
