In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torch.nn as nn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataRegular = pd.read_csv('../dataset\Pulsar_cleaned.csv', index_col=[0])
column_to_exclude = 'Class'
dataRegular = dataRegular.head(1000)
# Extract list of columns
data_cols = list(dataRegular.columns)
print('Dataset columns: {}'.format(data_cols))

Dataset columns: ['EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']


  dataRegular = pd.read_csv('dataset\Pulsar_cleaned.csv', index_col=[0])


In [4]:
column_name = 'EK'
num_negative_values = (dataRegular[column_name] < 0).sum()

print(f"The column '{column_name}' has {num_negative_values} negative values.")

The column 'EK' has 0 negative values.


In [5]:
n = 14987

In [6]:
data = torch.tensor(dataRegular.values, dtype=torch.float32).to(device)

In [7]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 7)
        )

    def forward(self, x):
        return self.model(x)

In [8]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [9]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.001)

In [10]:
num_epochs = 59948
batch_size = 1000  # Define your batch size

for epoch in range(num_epochs):
    # Train discriminator
    optimizer_d.zero_grad()
    
    # Sample a random batch of real data
    indices = torch.randperm(data.size(0))[:batch_size]
    real_data = data[indices]
    real_labels = torch.ones(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for real data
    outputs_real = discriminator(real_data)
    d_loss_real = criterion(outputs_real, real_labels)

    # Similarly, sample a random batch of noise for fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)
    fake_labels = torch.zeros(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for fake data
    outputs_fake = discriminator(fake_data.detach())  # Detach to avoid backprop through generator
    d_loss_fake = criterion(outputs_fake, fake_labels)

    # Calculate total discriminator loss
    d_loss = d_loss_real + d_loss_fake
    
    # Backward pass and optimization for discriminator
    d_loss.backward()
    optimizer_d.step()

    # Train generator
    optimizer_g.zero_grad()

    # Generate fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)

    # Labels for the generator (all ones, as we want to fool the discriminator)
    gen_labels = torch.ones(batch_size, 1).to(device)

    # Forward pass through discriminator for fake data (no detach needed here)
    outputs = discriminator(fake_data)

    # Calculate generator loss
    g_loss = criterion(outputs, gen_labels)

    # Backward pass and optimization for generator
    g_loss.backward()
    optimizer_g.step()

    # Print losses
    if (epoch+1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

Epoch [1000/59948], d_loss: 0.2223, g_loss: 3.2321
Epoch [2000/59948], d_loss: 1.4956, g_loss: 1.1060
Epoch [3000/59948], d_loss: 1.8648, g_loss: 0.9505
Epoch [4000/59948], d_loss: 1.6399, g_loss: 0.8150
Epoch [5000/59948], d_loss: 1.0693, g_loss: 1.0088
Epoch [6000/59948], d_loss: 1.2574, g_loss: 0.9345
Epoch [7000/59948], d_loss: 1.1872, g_loss: 1.0096
Epoch [8000/59948], d_loss: 1.3271, g_loss: 0.8179
Epoch [9000/59948], d_loss: 1.7823, g_loss: 0.7250
Epoch [10000/59948], d_loss: 1.4776, g_loss: 0.7416
Epoch [11000/59948], d_loss: 1.3487, g_loss: 0.7980
Epoch [12000/59948], d_loss: 1.5666, g_loss: 0.8336
Epoch [13000/59948], d_loss: 1.0000, g_loss: 1.1074
Epoch [14000/59948], d_loss: 1.5576, g_loss: 0.7347
Epoch [15000/59948], d_loss: 1.0541, g_loss: 0.9600
Epoch [16000/59948], d_loss: 1.7133, g_loss: 0.6171
Epoch [17000/59948], d_loss: 1.6886, g_loss: 0.5987
Epoch [18000/59948], d_loss: 2.0304, g_loss: 0.6552
Epoch [19000/59948], d_loss: 0.9853, g_loss: 1.0579
Epoch [20000/59948], 

In [11]:
# After training, generate some synthetic data
with torch.no_grad():
    test_noise = torch.randn(n, 7).to(device)
    generated_data = generator(test_noise).cpu().numpy()

# Print the first 10 rows of generated data
print("Generated Data (First 10 rows):")
for i in range(10):
    print(generated_data[i])


Generated Data (First 10 rows):
[-0.45784327 -0.9742362   8.832596   29.035347    7.8551416  44.7109
 -0.23122078]
[ -0.4067379  -1.7956743   3.7219641  22.44391    21.179878  220.56863
  -0.3612128]
[ -0.62063813  -1.3959024    2.553611    12.408081    13.852491
 154.8984      -0.24041934]
[ -1.0087315   -4.5938616    5.2131004   17.84923     31.53973
 419.19092     -0.52172714]
[ -0.5710578   -2.3050046    3.6486945   11.625623    19.268476
 244.4461      -0.35948157]
[-0.2686791  -1.1203265  11.39431    42.067734   11.127461   55.7636
 -0.26746377]
[-0.25465968 -0.73292273  6.357391   28.610252    9.15034    45.957027
 -0.18199109]
[-0.13360399 -0.5461153   3.5570166  18.27758     6.126438   33.153446
 -0.10827979]
[-0.20981285 -0.8620524   2.0033736   7.440011    6.7533207  72.701775
 -0.08543736]
[ 0.5665717  -0.24404709 14.042959   43.12979     6.9799104  17.764317
 -0.24006793]


In [12]:
import pandas as pd

generated_df = pd.DataFrame(generated_data)

generated_df.to_csv('../dataset/generated_data_method2.csv', index=False)
