In [22]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import torch
import torch.nn as nn

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
dataRegular = pd.read_csv('/kaggle/input/pulsar-classification-for-class-prediction-cleaned/Pulsar_cleaned.csv', index_col=[0])
column_to_exclude = 'Class'
dataRegular = dataRegular.head(1000);
# Extract list of columns
data_cols = list(dataRegular.columns)
print('Dataset columns: {}'.format(data_cols))

Dataset columns: ['EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve', 'Class']


In [25]:
column_name = 'EK'
num_negative_values = (dataRegular[column_name] < 0).sum()

print(f"The column '{column_name}' has {num_negative_values} negative values.")

The column 'EK' has 0 negative values.


In [26]:
n = 14987

In [27]:
data = torch.tensor(dataRegular.values, dtype=torch.float32).to(device)

In [28]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 7)
        )

    def forward(self, x):
        return self.model(x)

In [29]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 50),
            nn.ReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [30]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.001)

In [31]:
num_epochs = 59948
batch_size = 1000  # Define your batch size

for epoch in range(num_epochs):
    # Train discriminator
    optimizer_d.zero_grad()
    
    # Sample a random batch of real data
    indices = torch.randperm(data.size(0))[:batch_size]
    real_data = data[indices]
    real_labels = torch.ones(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for real data
    outputs_real = discriminator(real_data)
    d_loss_real = criterion(outputs_real, real_labels)

    # Similarly, sample a random batch of noise for fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)
    fake_labels = torch.zeros(batch_size, 1).to(device)  # Adjust label size to match batch size

    # Forward pass through discriminator for fake data
    outputs_fake = discriminator(fake_data.detach())  # Detach to avoid backprop through generator
    d_loss_fake = criterion(outputs_fake, fake_labels)

    # Calculate total discriminator loss
    d_loss = d_loss_real + d_loss_fake
    
    # Backward pass and optimization for discriminator
    d_loss.backward()
    optimizer_d.step()

    # Train generator
    optimizer_g.zero_grad()

    # Generate fake data
    noise = torch.randn(batch_size, 7).to(device)
    fake_data = generator(noise)

    # Labels for the generator (all ones, as we want to fool the discriminator)
    gen_labels = torch.ones(batch_size, 1).to(device)

    # Forward pass through discriminator for fake data (no detach needed here)
    outputs = discriminator(fake_data)

    # Calculate generator loss
    g_loss = criterion(outputs, gen_labels)

    # Backward pass and optimization for generator
    g_loss.backward()
    optimizer_g.step()

    # Print losses
    if (epoch+1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

Epoch [1000/59948], d_loss: 0.8528, g_loss: 3.1275
Epoch [2000/59948], d_loss: 1.1607, g_loss: 1.3230
Epoch [3000/59948], d_loss: 1.4415, g_loss: 0.8289
Epoch [4000/59948], d_loss: 0.9960, g_loss: 1.3960
Epoch [5000/59948], d_loss: 1.0652, g_loss: 1.0916
Epoch [6000/59948], d_loss: 1.5607, g_loss: 0.7492
Epoch [7000/59948], d_loss: 0.9989, g_loss: 1.1843
Epoch [8000/59948], d_loss: 1.9098, g_loss: 0.6517
Epoch [9000/59948], d_loss: 1.2129, g_loss: 1.0738
Epoch [10000/59948], d_loss: 0.8543, g_loss: 1.2454
Epoch [11000/59948], d_loss: 1.4961, g_loss: 0.7028
Epoch [12000/59948], d_loss: 1.6187, g_loss: 0.7356
Epoch [13000/59948], d_loss: 1.7662, g_loss: 0.6144
Epoch [14000/59948], d_loss: 1.2414, g_loss: 0.7974
Epoch [15000/59948], d_loss: 1.5565, g_loss: 0.6122
Epoch [16000/59948], d_loss: 1.4428, g_loss: 0.7425
Epoch [17000/59948], d_loss: 0.9989, g_loss: 1.0014
Epoch [18000/59948], d_loss: 1.1646, g_loss: 0.8796
Epoch [19000/59948], d_loss: 1.0751, g_loss: 0.9464
Epoch [20000/59948], 

In [33]:
# After training, generate some synthetic data
with torch.no_grad():
    test_noise = torch.randn(n, 7).to(device)
    generated_data = generator(test_noise).cpu().numpy()

# Print the first 10 rows of generated data
print("Generated Data (First 10 rows):")
for i in range(10):
    print(generated_data[i])


Generated Data (First 10 rows):
[-2.9681918e-01  1.0031540e+00  2.0251722e+00  3.5118515e+01
  2.0136461e+01  3.2089490e+02  4.3688032e-01]
[ 0.04682364  0.2830529   0.8552967  12.878707    3.832282   35.880733
  0.10641435]
[-0.25088254  0.26982954  0.6077123  14.402217    7.01315    98.40495
  0.15741426]
[ 0.1802651   0.5250217   1.2741059  18.310177    7.4375772  97.65476
  0.18637349]
[-4.0521946e-02  2.0876279e-01  6.7777133e-01  1.3576564e+01
  5.8237782e+00  7.4783218e+01  1.3858923e-01]
[ -0.4313601    0.4780166   -0.20185645  13.825994     8.136069
 120.84089      0.17103374]
[  0.42920446   0.86656094   2.3039644   30.004452    12.939145
 189.39601      0.36777666]
[ 0.13230881  0.50894594  1.1787046  15.245796    3.8341026  35.7682
  0.12431557]
[ 0.11960083  0.21583596  0.64479274 12.566814    5.1564317  61.053493
  0.12727572]
[  0.18275192   0.3647568    1.1969388   17.197601     7.5651674
 103.07168      0.1819869 ]
