In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
df = pd.read_csv('data.csv')
print("Dataset size:", df.shape)
df.head()

In [None]:

x = df['NO2'].dropna().values.reshape(-1, 1)

# Normalization is crucial for GAN stability
x_mean, x_std = x.mean(), x.std()
x_scaled = (x - x_mean) / x_std

# Transformation x -> z
r = 102483084
ar = 0.5 * (r % 7)
br = 0.3 * ((r % 5) + 1)

z = x_scaled + ar * np.sin(br * x_scaled)
z_tensor = torch.FloatTensor(z)

In [None]:
class Generator(nn.Module):
    def __init__(self, input_dim=1, output_dim=1):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.main(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim=1):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 32),
            nn.LeakyReLU(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x)

In [None]:
def train_gan(z_tensor, epochs=10000, batch_size=128):
    gen = Generator()
    disc = Discriminator()
    
    g_optimizer = optim.Adam(gen.parameters(), lr=0.0002)
    d_optimizer = optim.Adam(disc.parameters(), lr=0.0002)
    criterion = nn.BCELoss()

    print("Starting training...")
    for epoch in range(epochs):
        # 1. Train Discriminator
        disc.zero_grad()
        
        # Real samples
        idx = torch.randint(0, len(z_tensor), (batch_size,))
        real_data = z_tensor[idx]
        real_labels = torch.ones(batch_size, 1)
        
        output_real = disc(real_data)
        loss_real = criterion(output_real, real_labels)
        
        # Fake samples
        noise = torch.randn(batch_size, 1)
        fake_data = gen(noise)
        fake_labels = torch.zeros(batch_size, 1)
        
        output_fake = disc(fake_data.detach())
        loss_fake = criterion(output_fake, fake_labels)
        
        d_loss = loss_real + loss_fake
        d_loss.backward()
        d_optimizer.step()

        # 2. Train Generator
        gen.zero_grad()
        output_gen = disc(fake_data)
        # We want the discriminator to label these as '1' (real)
        g_loss = criterion(output_gen, real_labels)
        
        g_loss.backward()
        g_optimizer.step()

        if epoch % 1000 == 0:
            print(f"Epoch [{epoch}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")
            
    return gen

In [None]:
def plot_results(z_real, gen_model):
    gen_model.eval()
    with torch.no_grad():
        noise = torch.randn(10000, 1)
        z_fake = gen_model(noise).numpy()

    plt.figure(figsize=(10, 6))
    
    # Kernel Density Estimation (KDE) for PDF comparison
    sns.kdeplot(z_real.flatten(), label='True Distribution (z)', color='blue', shade=True)
    sns.kdeplot(z_fake.flatten(), label='GAN Learned PDF (z_f)', color='red', linestyle='--')
    
    plt.title("PDF Approximation: GAN vs Real Data")
    plt.xlabel("Value of z")
    plt.ylabel("Density")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

In [None]:
if __name__ == "__main__":
    trained_gen = train_gan(z_tensor)
    plot_results(z_tensor.numpy(), trained_gen)