In [45]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

# Učitavanje novog dataset-a
dataRegular = pd.read_csv('../../datasets/tabular/original/alzheimers_disease_data.csv', index_col=[0])

# Uklonite ili zamenite NaN vrednosti (ovde zamenjujemo sa 0)
dataRegular = dataRegular.fillna(0)

# Uklonite ne-numeričke kolone
non_numeric_columns = dataRegular.select_dtypes(exclude=[np.number]).columns
dataRegular = dataRegular.drop(columns=non_numeric_columns)

# Konvertujte sve vrednosti u float32
dataRegular = dataRegular.astype(np.float32)

# Konvertujte u PyTorch tensor i prebacite na odgovarajući uređaj
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = torch.tensor(dataRegular.values, dtype=torch.float32).to(device)

# Definicija Generatora
class Generator(nn.Module):
    def __init__(self, input_size, output_size):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 50),
            nn.ReLU(),
            nn.Linear(50, output_size)
        )

    def forward(self, x):
        return self.model(x)

# Definicija Diskriminatora
class Discriminator(nn.Module):
    def __init__(self, input_size):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 50),
            nn.ReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# Kreiranje instanci generatora i diskriminatora
input_size = data.shape[1]  # Ulazna dimenzija odgovara broju karakteristika u datasetu
output_size = len(dataRegular.columns)  # Izlazna dimenzija odgovara broju kolona u ciljnom datasetu
generator = Generator(input_size, output_size).to(device)
discriminator = Discriminator(output_size).to(device)

# Loss funkcija i optimizatori
criterion = nn.BCELoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=0.001)

# Parametri treninga
num_epochs = 20000
batch_size = 100  # Velicina batch-a

# Trening GAN-a
for epoch in range(num_epochs):
    # Train discriminator
    optimizer_d.zero_grad()
    
    # Realni podaci
    indices = torch.randperm(data.size(0))[:batch_size]
    real_data = data[indices]
    real_labels = torch.ones(batch_size, 1).to(device)
    outputs_real = discriminator(real_data)
    d_loss_real = criterion(outputs_real, real_labels)

    # Generisani podaci
    noise = torch.randn(batch_size, input_size).to(device)
    fake_data = generator(noise)
    fake_labels = torch.zeros(batch_size, 1).to(device)
    outputs_fake = discriminator(fake_data.detach())
    d_loss_fake = criterion(outputs_fake, fake_labels)

    # Ukupni gubitak diskriminatora
    d_loss = d_loss_real + d_loss_fake
    d_loss.backward()
    optimizer_d.step()

    # Train generator
    optimizer_g.zero_grad()
    noise = torch.randn(batch_size, input_size).to(device)
    gen_data = generator(noise)
    gen_labels = torch.ones(batch_size, 1).to(device)
    outputs = discriminator(gen_data)
    g_loss = criterion(outputs, gen_labels)
    g_loss.backward()
    optimizer_g.step()

    # Ispis gubitaka
    if (epoch+1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

# Generisanje sintetičkih podataka
n_samples = 1000  # Broj sintetičkih primera koje želite da generišete
with torch.no_grad():
    test_noise = torch.randn(n_samples, input_size).to(device)
    generated_data = generator(test_noise).cpu().numpy()

# Pretvaranje u binarne vrednosti za dijagnozu
diagnosis_threshold = 0.5  # Prag za dijagnozu

generated_data[:, -1] = (generated_data[:, -1] > diagnosis_threshold).astype(np.float32)

# Čuvanje generisanih podataka u CSV datoteku
generated_df = pd.DataFrame(generated_data, columns=dataRegular.columns)
generated_df.insert(0, 'PatientID', range(1, 1 + len(generated_df)))  # Dodavanje kolone PatientID
generated_df.to_csv('../../datasets/tabular/generated/alzheimers_disease_generated_method2.csv', index=False)

print("Generisanje i čuvanje sintetičkih podataka završeno.")


Epoch [1000/20000], d_loss: 0.0132, g_loss: 5.2842
Epoch [2000/20000], d_loss: 0.0121, g_loss: 6.8541
Epoch [3000/20000], d_loss: 0.0233, g_loss: 6.5465
Epoch [4000/20000], d_loss: 0.2638, g_loss: 10.2298
Epoch [5000/20000], d_loss: 1.5692, g_loss: 4.7049
Epoch [6000/20000], d_loss: 0.5324, g_loss: 5.9239
Epoch [7000/20000], d_loss: 0.6435, g_loss: 5.2534
Epoch [8000/20000], d_loss: 0.9501, g_loss: 4.8253
Epoch [9000/20000], d_loss: 1.0291, g_loss: 5.4933
Epoch [10000/20000], d_loss: 0.7740, g_loss: 3.6468
Epoch [11000/20000], d_loss: 3.1487, g_loss: 1.0879
Epoch [12000/20000], d_loss: 2.7970, g_loss: 0.9575
Epoch [13000/20000], d_loss: 1.7670, g_loss: 1.8818
Epoch [14000/20000], d_loss: 0.4979, g_loss: 3.9030
Epoch [15000/20000], d_loss: 3.6804, g_loss: 2.0398
Epoch [16000/20000], d_loss: 1.5856, g_loss: 2.2586
Epoch [17000/20000], d_loss: 0.8730, g_loss: 2.0635
Epoch [18000/20000], d_loss: 0.9460, g_loss: 1.9373
Epoch [19000/20000], d_loss: 0.9050, g_loss: 3.5184
Epoch [20000/20000],