In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import grad
from sklearn.preprocessing import MinMaxScaler

In [None]:
# === Ler os dados .CSV / Read .CSV data ===
dados = pd.read_csv('NAME.csv', sep=',')  # especifica o separador / specifies the separator
print(dados.head())

In [None]:
# ===== 1. Ler os dados .XLSX / Read .XLSX data =====
dados = pd.read_excel("NAME.xlsx")
print(dados.head())

In [None]:
# ===== Exclui NaN / Excludes NaN =====
dados_cleaned = dados.dropna()

classes = dados.iloc[1:, 0].values   # primeira coluna (classes) / first column (classes)
features = dados.columns[1:]         # primeira linha (nomes das features) / first line (feature names)
X = dados.iloc[1:, 1:].astype(float).values

# Normalizar para [0,1] / Normalize to [0,1]
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# ========= 2. Definir modelos WGAN / Define WGAN models =========
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
            nn.Sigmoid()  # espectros normalizados [0,1] / normalized spectra [0,1]
        )
    def forward(self, z):
        return self.model(z)

class Critic(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1)
        )
    def forward(self, x):
        return self.model(x)

# ========= 3. Função Gradient Penalty / Gradient Penalty Function =========
def gradient_penalty(critic, real, fake, device="cpu"):
    batch_size, features = real.shape
    epsilon = torch.rand((batch_size, 1)).to(device)
    interpolated = real * epsilon + fake * (1 - epsilon)
    interpolated.requires_grad_(True)
    mixed_scores = critic(interpolated)
    gradient = grad(
        outputs=mixed_scores,
        inputs=interpolated,
        grad_outputs=torch.ones_like(mixed_scores),
        create_graph=True,
        retain_graph=True
    )[0]
    gradient = gradient.view(gradient.shape[0], -1)
    gp = ((gradient.norm(2, dim=1) - 1) ** 2).mean()
    return gp

# ========= 4. Função de treino por classe / Training function by class =========
def treinar_wgan(X_class, n_epochs=500, batch_size=32, noise_dim=50, n_fake=50, device="cpu"):
    X_tensor = torch.tensor(X_class, dtype=torch.float32).to(device)

    gen = Generator(noise_dim, X_class.shape[1]).to(device)
    critic = Critic(X_class.shape[1]).to(device)

    opt_gen = optim.Adam(gen.parameters(), lr=1e-4, betas=(0.0, 0.9))
    opt_critic = optim.Adam(critic.parameters(), lr=1e-4, betas=(0.0, 0.9))

    for epoch in range(n_epochs):
        for i in range(0, len(X_tensor), batch_size):
            real = X_tensor[i:i+batch_size]
            cur_batch_size = real.shape[0]

            # Treinar crítico / Train critical
            for _ in range(5):  # n_critic steps
                z = torch.randn(cur_batch_size, noise_dim).to(device)
                fake = gen(z)

                critic_real = critic(real).mean()
                critic_fake = critic(fake).mean()
                gp = gradient_penalty(critic, real, fake, device=device)
                loss_critic = -(critic_real - critic_fake) + 10 * gp

                opt_critic.zero_grad()
                loss_critic.backward(retain_graph=True)
                opt_critic.step()

            # Treinar gerador / Train generator
            z = torch.randn(cur_batch_size, noise_dim).to(device)
            fake = gen(z)
            gen_loss = -critic(fake).mean()

            opt_gen.zero_grad()
            gen_loss.backward()
            opt_gen.step()

    # ===== Gerar novos espectros / Generate new spectra =====
    z = torch.randn(n_fake, noise_dim).to(device)
    fake = gen(z).detach().cpu().numpy()
    return fake

In [None]:
# ========= 5. Data augmentation para todas as classes / Data augmentation for all classes =========
classes_unicas = np.unique(classes)
dados_sinteticos = []

for c in classes_unicas:
    X_c = X_scaled[classes == c]
    espectros_fake = treinar_wgan(X_c, n_epochs=500, n_fake=500)
    # Desnormalizar / Denormalize
    espectros_fake = scaler.inverse_transform(espectros_fake)
    # Reconstituir com a classe / Reconstitute with the class
    for ef in espectros_fake:
        dados_sinteticos.append([c] + ef.tolist())

# Concatenar originais + sintéticos / Concatenate originals + synthetics
dados_finais = pd.DataFrame(
    [[c] + x.tolist() for c, x in zip(classes, scaler.inverse_transform(X_scaled))] + dados_sinteticos,
    columns=["Classe"] + list(features)
)

In [None]:
# ========= 6. Salvar / save =========
dados_finais.to_csv("dataset_augmented.csv", index=False)
print("Arquivo salvo em / File saved in dataset_augmented.csv")


Arquivo salvo em dataset_augmented.csv
