In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torch.nn as nn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataRegular = pd.read_csv('dataset\Pulsar_cleaned.csv', index_col=[0])
column_to_exclude = 'Class'
dataRegular = dataRegular.head(1000)
# Extract list of columns
data_cols = list(dataRegular.columns)
print('Dataset columns: {}'.format(data_cols))

Dataset columns: ['EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']


  dataRegular = pd.read_csv('dataset\Pulsar_cleaned.csv', index_col=[0])


In [4]:
column_name = 'EK'
num_negative_values = (dataRegular[column_name] < 0).sum()

print(f"The column '{column_name}' has {num_negative_values} negative values.")

The column 'EK' has 0 negative values.


In [5]:
n = 14987

In [6]:
data = torch.tensor(dataRegular.values, dtype=torch.float32).to(device)

In [7]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 7)
        )

    def forward(self, x):
        return self.model(x)

In [8]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [9]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.001)

In [10]:
num_epochs = 59948
batch_size = 1000  # Define your batch size

for epoch in range(num_epochs):
    # Train discriminator
    optimizer_d.zero_grad()
    
    # Sample a random batch of real data
    indices = torch.randperm(data.size(0))[:batch_size]
    real_data = data[indices]
    real_labels = torch.ones(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for real data
    outputs_real = discriminator(real_data)
    d_loss_real = criterion(outputs_real, real_labels)

    # Similarly, sample a random batch of noise for fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)
    fake_labels = torch.zeros(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for fake data
    outputs_fake = discriminator(fake_data.detach())  # Detach to avoid backprop through generator
    d_loss_fake = criterion(outputs_fake, fake_labels)

    # Calculate total discriminator loss
    d_loss = d_loss_real + d_loss_fake
    
    # Backward pass and optimization for discriminator
    d_loss.backward()
    optimizer_d.step()

    # Train generator
    optimizer_g.zero_grad()

    # Generate fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)

    # Labels for the generator (all ones, as we want to fool the discriminator)
    gen_labels = torch.ones(batch_size, 1).to(device)

    # Forward pass through discriminator for fake data (no detach needed here)
    outputs = discriminator(fake_data)

    # Calculate generator loss
    g_loss = criterion(outputs, gen_labels)

    # Backward pass and optimization for generator
    g_loss.backward()
    optimizer_g.step()

    # Print losses
    if (epoch+1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

Epoch [1000/59948], d_loss: 0.5583, g_loss: 3.2420
Epoch [2000/59948], d_loss: 0.7579, g_loss: 1.7161
Epoch [3000/59948], d_loss: 0.9976, g_loss: 1.2501
Epoch [4000/59948], d_loss: 1.0687, g_loss: 1.5908
Epoch [5000/59948], d_loss: 1.4468, g_loss: 0.9551
Epoch [6000/59948], d_loss: 0.9640, g_loss: 1.1803
Epoch [7000/59948], d_loss: 1.1043, g_loss: 1.0775
Epoch [8000/59948], d_loss: 1.3082, g_loss: 0.8236
Epoch [9000/59948], d_loss: 2.1338, g_loss: 0.5633
Epoch [10000/59948], d_loss: 1.6990, g_loss: 0.6637
Epoch [11000/59948], d_loss: 1.4820, g_loss: 0.8061
Epoch [12000/59948], d_loss: 1.6437, g_loss: 0.6684
Epoch [13000/59948], d_loss: 1.5518, g_loss: 0.6669
Epoch [14000/59948], d_loss: 1.0916, g_loss: 0.8552
Epoch [15000/59948], d_loss: 1.7814, g_loss: 0.6425
Epoch [16000/59948], d_loss: 1.2449, g_loss: 0.7493
Epoch [17000/59948], d_loss: 1.3824, g_loss: 0.7053
Epoch [18000/59948], d_loss: 1.4768, g_loss: 0.7695
Epoch [19000/59948], d_loss: 2.0637, g_loss: 0.5623
Epoch [20000/59948], 

In [11]:
# After training, generate some synthetic data
with torch.no_grad():
    test_noise = torch.randn(n, 7).to(device)
    generated_data = generator(test_noise).cpu().numpy()

# Print the first 10 rows of generated data
print("Generated Data (First 10 rows):")
for i in range(10):
    print(generated_data[i])


Generated Data (First 10 rows):
[ 0.84695315  0.7125934   3.9386263  14.330328    6.523588   55.231236
 -0.08626029]
[ 9.2678571e-01  6.2243819e-01  1.3659359e+00  1.2207733e+01
  1.5691371e+01  1.9491223e+02 -3.7275977e-02]
[6.5976679e-01 1.0331978e-01 2.2273533e+00 1.0714377e+01 8.3928757e+00
 8.6621582e+01 1.8402504e-02]
[ 1.3759143e+00  3.7417129e-01  1.4391681e+00  1.2155476e+01
  1.4413596e+01  1.7653134e+02 -9.1516986e-02]
[ 0.6681709  -0.21823665  3.5894299  12.58607     7.573654   58.962498
  0.09178419]
[ 1.5383736  -0.68684435  4.981564   14.720266    6.3482876  29.211422
  0.09068404]
[5.5004072e-01 7.3943496e-01 1.5178814e+00 1.0083310e+01 7.5817356e+00
 6.7732361e+01 1.4401743e-02]
[ 3.2633808e-01  1.8684515e+00  5.1117120e+00  2.5428261e+01
  1.3146632e+01  1.1116849e+02 -7.7357572e-03]
[ 8.6241663e-01  4.3514350e-01  1.9082512e+00  1.3617674e+01
  1.3227101e+01  1.5454631e+02 -4.0718421e-02]
[ 1.9267232   0.15154855  7.0509768  25.770985    6.974245   20.151089
  0.1652

In [12]:
import pandas as pd

generated_df = pd.DataFrame(generated_data)

generated_df.to_csv('dataset/generated_data_method2.csv', index=False)


Generisani podaci su uspešno sačuvani u CSV fajl.
