In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from numericGAN import Generator, Discriminator
import pandas as pd

In [2]:

# Load and preprocess the Iris dataset using sklearn
iris = load_iris()
data = iris.data  # Only use features (shape: (150, 4))

# Normalize the dataset
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
data = torch.tensor(data, dtype=torch.float32)

# GAN training parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
latent_dim = 10  # Latent space dimensionality
data_dim = data.shape[1]  # Number of features
batch_size = 32
epochs = 200
lr = 0.0002

# Create DataLoader
data_loader = DataLoader(TensorDataset(data), batch_size=batch_size, shuffle=True)

# 

In [3]:
generator = Generator(input_dim=latent_dim, output_dim=data_dim).to(device)
discriminator = Discriminator(input_dim=data_dim).to(device)

# Define optimizers and loss function
optimizer_g = optim.Adam(generator.parameters(), lr=lr)
optimizer_d = optim.Adam(discriminator.parameters(), lr=lr)
loss_fn = nn.BCELoss()


In [5]:


# Training loop
for epoch in range(epochs):
    for real_batch, in data_loader:
        real_batch = real_batch.to(device)
        batch_size = real_batch.size(0)

        # Train discriminator
        z = torch.randn(batch_size, latent_dim).to(device)
        fake_data = generator(z)

        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        d_real = discriminator(real_batch)
        d_fake = discriminator(fake_data.detach())

        d_loss_real = loss_fn(d_real, real_labels)
        d_loss_fake = loss_fn(d_fake, fake_labels)
        d_loss = d_loss_real + d_loss_fake

        optimizer_d.zero_grad()
        d_loss.backward()
        optimizer_d.step()

        # Train generator
        z = torch.randn(batch_size, latent_dim).to(device)
        fake_data = generator(z)
        g_loss = loss_fn(discriminator(fake_data), real_labels)

        optimizer_g.zero_grad()
        g_loss.backward()
        optimizer_g.step()

    # Print loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")



Epoch 0, D Loss: 1.4312, G Loss: 0.7871
Epoch 100, D Loss: 1.3525, G Loss: 0.7272


In [9]:
z = torch.randn(150, latent_dim).to(device)  # Generate the same number of samples as the Iris dataset
synthetic_data = generator(z).detach().cpu().numpy()
synthetic_data = scaler.inverse_transform(synthetic_data)  # Rescale back to the original range

synthetic_pd = pd.DataFrame(synthetic_data)
synthetic_pd.to_csv("synthetic_iris.csv", index=False)


In [10]:
synthetic_pd

Unnamed: 0,0,1,2,3
0,5.893703,2.877456,5.363830,1.632013
1,5.488304,2.606469,4.206050,1.247747
2,6.078844,2.821317,5.679056,1.616346
3,5.547008,2.622879,4.397995,1.274534
4,5.511443,2.669280,4.176014,1.244989
...,...,...,...,...
145,5.844644,2.702477,4.847985,1.495224
146,5.599219,2.663075,4.415401,1.178625
147,5.307764,2.549189,3.974720,1.164996
148,5.287980,2.597840,3.942684,1.118822
