In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torch.nn as nn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataRegular = pd.read_csv('dataset\Pulsar_cleaned.csv', index_col=[0])
column_to_exclude = 'Class'
dataRegular = dataRegular.head(1000)
# Extract list of columns
data_cols = list(dataRegular.columns)
print('Dataset columns: {}'.format(data_cols))

Dataset columns: ['EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']


  dataRegular = pd.read_csv('dataset\Pulsar_cleaned.csv', index_col=[0])


In [4]:
column_name = 'EK'
num_negative_values = (dataRegular[column_name] < 0).sum()

print(f"The column '{column_name}' has {num_negative_values} negative values.")

The column 'EK' has 0 negative values.


In [5]:
n = 14987

In [6]:
data = torch.tensor(dataRegular.values, dtype=torch.float32).to(device)

In [7]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 7)
        )

    def forward(self, x):
        return self.model(x)

In [8]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [9]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.001)

In [10]:
num_epochs = 59948
batch_size = 1000  # Define your batch size

for epoch in range(num_epochs):
    # Train discriminator
    optimizer_d.zero_grad()
    
    # Sample a random batch of real data
    indices = torch.randperm(data.size(0))[:batch_size]
    real_data = data[indices]
    real_labels = torch.ones(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for real data
    outputs_real = discriminator(real_data)
    d_loss_real = criterion(outputs_real, real_labels)

    # Similarly, sample a random batch of noise for fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)
    fake_labels = torch.zeros(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for fake data
    outputs_fake = discriminator(fake_data.detach())  # Detach to avoid backprop through generator
    d_loss_fake = criterion(outputs_fake, fake_labels)

    # Calculate total discriminator loss
    d_loss = d_loss_real + d_loss_fake
    
    # Backward pass and optimization for discriminator
    d_loss.backward()
    optimizer_d.step()

    # Train generator
    optimizer_g.zero_grad()

    # Generate fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)

    # Labels for the generator (all ones, as we want to fool the discriminator)
    gen_labels = torch.ones(batch_size, 1).to(device)

    # Forward pass through discriminator for fake data (no detach needed here)
    outputs = discriminator(fake_data)

    # Calculate generator loss
    g_loss = criterion(outputs, gen_labels)

    # Backward pass and optimization for generator
    g_loss.backward()
    optimizer_g.step()

    # Print losses
    if (epoch+1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

Epoch [1000/59948], d_loss: 1.2584, g_loss: 1.5656
Epoch [2000/59948], d_loss: 1.1527, g_loss: 1.0173
Epoch [3000/59948], d_loss: 1.0353, g_loss: 0.9949
Epoch [4000/59948], d_loss: 1.3093, g_loss: 0.7799
Epoch [5000/59948], d_loss: 1.8310, g_loss: 0.5772
Epoch [6000/59948], d_loss: 0.8478, g_loss: 1.1891
Epoch [7000/59948], d_loss: 1.1410, g_loss: 0.8888
Epoch [8000/59948], d_loss: 1.6024, g_loss: 0.6784
Epoch [9000/59948], d_loss: 1.5593, g_loss: 0.7323
Epoch [10000/59948], d_loss: 1.0621, g_loss: 1.1329
Epoch [11000/59948], d_loss: 1.7415, g_loss: 0.6605
Epoch [12000/59948], d_loss: 1.7091, g_loss: 0.6404
Epoch [13000/59948], d_loss: 1.2182, g_loss: 0.8946
Epoch [14000/59948], d_loss: 1.1212, g_loss: 0.8952
Epoch [15000/59948], d_loss: 0.8474, g_loss: 1.2709
Epoch [16000/59948], d_loss: 1.3282, g_loss: 0.9083
Epoch [17000/59948], d_loss: 0.8393, g_loss: 1.1902
Epoch [18000/59948], d_loss: 1.5424, g_loss: 0.6742
Epoch [19000/59948], d_loss: 1.1585, g_loss: 0.8428
Epoch [20000/59948], 

In [11]:
# After training, generate some synthetic data
with torch.no_grad():
    test_noise = torch.randn(n, 7).to(device)
    generated_data = generator(test_noise).cpu().numpy()

# Print the first 10 rows of generated data
print("Generated Data (First 10 rows):")
for i in range(10):
    print(generated_data[i])


Generated Data (First 10 rows):
[ 3.1860466e+00  2.0194799e-01 -6.0499555e-01  1.4202560e+01
  8.4486151e+00  1.4434401e+02  4.4897716e-02]
[ 2.3206987e+00 -3.8898483e-01  2.7384022e-01  1.2613256e+01
  5.9297152e+00  7.1087677e+01 -9.9303387e-03]
[ 1.9506946  -0.05230838  0.8383049  12.04418     4.96422    41.169857
  0.04972282]
[ 2.7013154e+00 -3.6071756e-01  1.9155035e+00  1.8516155e+01
  6.0105362e+00  3.4019936e+01  2.4519835e-02]
[ 2.5313444   0.33156818  0.272075   13.762238    6.838144   88.61497
  0.12366268]
[ 2.1856077e+00 -4.8100397e-01  1.2678405e+00  1.6425940e+01
  5.6398525e+00  4.3576805e+01  3.5908755e-02]
[ 2.4573321   0.33891845  0.51872206 14.407463    5.944672   88.61234
  0.09219527]
[ 3.4737475e+00  1.8174601e-01 -1.0145031e+00  1.3026339e+01
  7.3285823e+00  1.3328793e+02  1.1952549e-01]
[ 2.4753847e+00 -4.1604498e-01 -1.8301348e-01  1.1918360e+01
  5.2419033e+00  8.6711426e+01 -2.5017913e-02]
[2.4993372e+00 1.1398357e-01 1.6060369e-01 1.0495024e+01 6.1622310e