In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from torch.nn.utils import spectral_norm
from torch.optim.lr_scheduler import ReduceLROnPlateau  


# تنظیمات
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
z_dim = 100
cond_dim = 5  # تعداد کلاس‌ها
bert_dim = 768
hidden_dim = 256
batch_size = 128
num_epochs_gan = 1000  # افزایش به 1000 با Early Stopping
lr_g = 1e-4  # Learning rate برای Generator
lr_d = 1e-5  # Learning rate برای Discriminator
lambda_gp = 10  # مقدار Gradient Penalty
n_critic = 3  # تعداد مراحل بهینه‌سازی Discriminator
sim_weight = 0.01  # کاهش به 0.01
num_samples_per_class = 500  # تغییر به 500 برای t-SNE
output_dir = "F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\"

# بخش ۱) آماده‌سازی داده‌ها
X_bert = np.load(f"{output_dir}X_bert.npy")
df_rnalocate = pd.read_csv(f"{output_dir}rnalocate_dataset.csv")
labels_initial = df_rnalocate['label'].values

scaler = StandardScaler()
X_bert = scaler.fit_transform(X_bert)
X_bert_tensor = torch.FloatTensor(X_bert)
labels_tensor = torch.LongTensor(labels_initial)
dataset = TensorDataset(X_bert_tensor, labels_tensor)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class_counts = np.bincount(labels_initial, minlength=cond_dim)
majority_class_size = np.max(class_counts)
threshold = 0.3 * majority_class_size
rare_classes = np.where(class_counts < threshold)[0].tolist()
print(f"Rare classes identified: {rare_classes} with counts: {class_counts[rare_classes]}")

# بخش ۲) طراحی و آموزش Conditional GAN (WGAN-GP)
class AttentionBlock(nn.Module):
    def __init__(self, embed_dim, num_heads=4):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        self.norm = nn.LayerNorm(embed_dim)
    def forward(self, x):
        attn_output, _ = self.attn(x, x, x)
        return self.norm(x + attn_output)

class CrossAttentionBlock(nn.Module):
    def __init__(self, embed_dim, cond_dim, num_heads=4):
        super().__init__()
        self.label_embed = nn.Linear(cond_dim, embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        self.norm = nn.LayerNorm(embed_dim)
    def forward(self, x, labels):
        one_hot = torch.zeros(labels.size(0), cond_dim).to(device)
        one_hot.scatter_(1, labels.unsqueeze(1), 1)
        cond_embed = self.label_embed(one_hot)
        cond_embed = cond_embed.unsqueeze(1)
        attn_output, _ = self.attn(x, cond_embed, cond_embed)
        return self.norm(x + attn_output)

class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(z_dim + cond_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU()
        )
        self.res1 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.attn1 = AttentionBlock(embed_dim=32, num_heads=4)
        self.cross_attn = CrossAttentionBlock(embed_dim=32, cond_dim=cond_dim, num_heads=4)
        self.attn2 = AttentionBlock(embed_dim=32, num_heads=4)
        self.fc_post_attn = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU()
        )
        self.res2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.out = spectral_norm(nn.Linear(hidden_dim, bert_dim))
    def forward(self, z, labels):
        class_onehot = F.one_hot(labels, num_classes=cond_dim).float().to(device)
        gen_input = torch.cat((z, class_onehot), dim=1)
        x = self.fc1(gen_input)
        x = x + self.res1(x)
        B = x.size(0)
        x = x.view(B, 8, 32)
        x = self.attn1(x)
        x = self.cross_attn(x, labels)
        x = self.attn2(x)
        x = x.view(B, -1)
        x = self.fc_post_attn(x)
        x = x + self.res2(x)
        return torch.tanh(self.out(x))

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            spectral_norm(nn.Linear(bert_dim + cond_dim, hidden_dim)),
            nn.LeakyReLU(0.2),
            spectral_norm(nn.Linear(hidden_dim, hidden_dim // 2)),
            nn.LeakyReLU(0.2),
            spectral_norm(nn.Linear(hidden_dim // 2, 1))
        )
    def forward(self, x):
        return self.model(x)

def compute_gradient_penalty(D, real_samples, fake_samples, labels):
    alpha = torch.rand(real_samples.size(0), 1).to(device)
    interpolates = (alpha * real_samples + (1 - alpha) * fake_samples).requires_grad_(True)
    class_onehot = F.one_hot(labels, num_classes=cond_dim).float().to(device)
    disc_input_interpolates = torch.cat((interpolates, class_onehot), dim=1)
    d_interpolates = D(disc_input_interpolates)
    fake = torch.ones(real_samples.size(0), 1).to(device)
    gradients = torch.autograd.grad(
        outputs=d_interpolates, inputs=interpolates, grad_outputs=fake,
        create_graph=True, retain_graph=True, only_inputs=True
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

# د) حلقه آموزش
generator = Generator().to(device)
discriminator = Discriminator().to(device)
g_optimizer = optim.AdamW(generator.parameters(), lr=lr_g, betas=(0.5, 0.9))
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr_d, betas=(0.5, 0.9))
scheduler_g = ReduceLROnPlateau(g_optimizer, 'min', patience=100, factor=0.5)

d_losses, g_losses = [], []
best_g_loss = float('inf')
patience, early_stop_counter = 100, 0

for epoch in range(num_epochs_gan):
    epoch_g_losses = []
    for i, (real_bert, labels) in enumerate(train_loader):
        batch_size = real_bert.size(0)
        real_bert, labels = real_bert.to(device), labels.to(device)

        # آموزش Discriminator
        for _ in range(n_critic):
            d_optimizer.zero_grad()
            labels_batch = labels[:real_bert.size(0)]  # تراز با real_bert
            class_onehot = F.one_hot(labels_batch, num_classes=cond_dim).float().to(device)
            disc_input_real = torch.cat((real_bert, class_onehot), dim=1)
            real_validity = discriminator(disc_input_real)
            z = torch.randn(real_bert.size(0), z_dim).to(device)
            fake_bert = generator(z, labels_batch)
            disc_input_fake = torch.cat((fake_bert.detach(), class_onehot), dim=1)
            fake_validity = discriminator(disc_input_fake)
            gradient_penalty = compute_gradient_penalty(discriminator, real_bert, fake_bert.detach(), labels_batch)
            d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gradient_penalty
            d_loss.backward()
            d_optimizer.step()

        # آموزش Generator
        g_optimizer.zero_grad()
        labels_batch = labels[:real_bert.size(0)]
        class_onehot = F.one_hot(labels_batch, num_classes=cond_dim).float().to(device)
        z = torch.randn(real_bert.size(0), z_dim).to(device)
        fake_bert = generator(z, labels_batch)
        disc_input_fake = torch.cat((fake_bert, class_onehot), dim=1)
        fake_validity = discriminator(disc_input_fake)
        cos_sim = nn.CosineSimilarity(dim=1)
        sim_loss = 1 - cos_sim(fake_bert, real_bert[:real_bert.size(0)]).mean()
        g_loss = -torch.mean(fake_validity) + sim_weight * sim_loss
        g_loss.backward()
        g_optimizer.step()

        d_losses.append(d_loss.item())
        g_losses.append(g_loss.item())
        epoch_g_losses.append(g_loss.item())

        if (i + 1) % 5 == 0 or i == 0:
            print(f"Epoch [{epoch+1}/{num_epochs_gan}], Step [{i+1}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")

    scheduler_g.step(np.mean(epoch_g_losses))

    if np.mean(epoch_g_losses) < best_g_loss:
        best_g_loss = np.mean(epoch_g_losses)
        early_stop_counter = 0
        torch.save(generator.state_dict(), f"{output_dir}gan_generator_attention.pt")
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# بارگذاری مدل آموزش‌دیده
generator.load_state_dict(torch.load(f"{output_dir}gan_generator_attention.pt"))
generator.eval()

# تولید و ارزیابی برای کلاس‌های نادر باقی‌مانده
generated_data = {}
for cls in rare_classes:
    print(f"\nGenerating synthetic samples for Class {cls}...")
    z = torch.randn(num_samples_per_class, z_dim).to(device)
    labels_c = torch.full((num_samples_per_class,), cls, dtype=torch.long).to(device)
    with torch.no_grad():
        fake_bert = generator(z, labels_c)
    generated_data[cls] = fake_bert.cpu().numpy()
    np.save(f"{output_dir}generated_class_{cls}.npy", fake_bert.cpu().numpy())

# ارزیابی t-SNE و Silhouette
for cls in rare_classes:
    print(f"\nEvaluating synthetic data for Class {cls}...")
    real_class_idx = np.where(labels_initial == cls)[0][:500]
    real_samples = X_bert[real_class_idx]
    fake_samples = generated_data.get(cls, np.zeros((500, bert_dim)))[:500]

    min_len = min(len(real_samples), len(fake_samples))
    real_samples = real_samples[:min_len]
    fake_samples = fake_samples[:min_len]
    X_vis = np.concatenate([real_samples, fake_samples])
    labels_vis = np.concatenate([np.zeros(min_len), np.ones(min_len)])

    X_tsne = TSNE(n_components=2, random_state=42, n_jobs=-1).fit_transform(X_vis)
    plt.figure()
    sns.scatterplot(x=X_tsne[:, 0], y=X_tsne[:, 1], hue=labels_vis, palette=["blue", "red"], alpha=0.5)
    plt.title(f"t-SNE: Real vs Generated Data for Class {cls}")
    plt.savefig(f"{output_dir}t-SNE_class_{cls}.png")
    plt.close()

    silhouette_avg = silhouette_score(X_vis, labels_vis)
    print(f"Silhouette Score for Class {cls}: {silhouette_avg:.4f}")

Rare classes identified: [2, 3, 4] with counts: [ 350 1185  710]
Epoch [1/1000], Step [1], D Loss: 6.2057, G Loss: -0.0050
Epoch [1/1000], Step [5], D Loss: 6.2863, G Loss: -0.0680
Epoch [1/1000], Step [10], D Loss: 5.9976, G Loss: -0.1722
Epoch [1/1000], Step [15], D Loss: 5.9032, G Loss: -0.2949
Epoch [1/1000], Step [20], D Loss: 5.8524, G Loss: -0.4462
Epoch [1/1000], Step [25], D Loss: 5.5493, G Loss: -0.6197
Epoch [1/1000], Step [30], D Loss: 5.5517, G Loss: -0.7580
Epoch [1/1000], Step [35], D Loss: 5.5283, G Loss: -0.9212
Epoch [1/1000], Step [40], D Loss: 5.4786, G Loss: -1.0826
Epoch [1/1000], Step [45], D Loss: 5.4394, G Loss: -1.2287
Epoch [1/1000], Step [50], D Loss: 5.3117, G Loss: -1.4375
Epoch [1/1000], Step [55], D Loss: 5.4057, G Loss: -1.6406
Epoch [1/1000], Step [60], D Loss: 5.5722, G Loss: -1.8511
Epoch [1/1000], Step [65], D Loss: 5.7676, G Loss: -2.1520
Epoch [1/1000], Step [70], D Loss: 6.1150, G Loss: -2.4834
Epoch [1/1000], Step [75], D Loss: 6.4384, G Loss: -

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from torch.nn.utils import spectral_norm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from joblib import dump

# تنظیمات
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
z_dim = 128  # افزایش به 128
cond_dim = 5
output_dim = 1258  # تغییر به 1258
batch_size = 64  # یا 128
num_epochs_gan = 30
lr_g = 1e-4
lr_d = 4e-4
lambda_gp = 10
n_critic = 1  # یا 2
sim_weight = 0.0  # حذف در نسخه اول
max_norm = 5
patience = 20
output_dir = "F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\"

# بخش ۱) آماده‌سازی داده‌ها با اسکیلر جداگانه و PCA
scaler_bert = StandardScaler()
X_bert_train = scaler_bert.fit_transform(np.load(f"{output_dir}X_train_bert.npy"))

scaler_handcrafted = StandardScaler()
X_handcrafted_train = scaler_handcrafted.fit_transform(np.load(f"{output_dir}X_train_handcrafted.npy"))

y_train = np.load(f"{output_dir}y_train.npy")

print(f"Shape of X_bert_train: {X_bert_train.shape}")
print(f"Shape of X_handcrafted_train: {X_handcrafted_train.shape}")
if X_bert_train.shape[0] != X_handcrafted_train.shape[0]:
    raise ValueError(f"تعداد نمونه‌ها در X_bert_train ({X_bert_train.shape[0]}) و X_handcrafted_train ({X_handcrafted_train.shape[0]}) متفاوت است!")
X_train_dual = np.concatenate([X_bert_train, X_handcrafted_train], axis=1)
dual_dim = X_train_dual.shape[1]
print(f"Shape of X_train_dual: {X_train_dual.shape}, dual_dim: {dual_dim}")

# اعمال PCA
pca = PCA(n_components=512, random_state=42)
X_train_pca = pca.fit_transform(X_train_dual)
np.save(f"{output_dir}X_train_pca.npy", X_train_pca)
print(f"Shape of X_train_pca: {X_train_pca.shape}")

# استفاده از X_train_dual به جای X_train_pca در train_loader
X_train_tensor = torch.FloatTensor(X_train_dual)  # تغییر به X_train_dual (1258 بعدی)
y_train_tensor = torch.LongTensor(y_train)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

class_counts = np.bincount(y_train, minlength=cond_dim)
majority_class_size = np.max(class_counts)
threshold = 0.3 * majority_class_size
rare_classes = np.where(class_counts < threshold)[0].tolist()
print(f"Rare classes identified: {rare_classes} with counts: {class_counts[rare_classes]}")

dump({'scaler_bert': scaler_bert, 'scaler_handcrafted': scaler_handcrafted, 'pca': pca}, f"{output_dir}preprocessing.pkl")

# بخش ۲) طراحی و آموزش Conditional GAN (WGAN-GP)
class Generator(nn.Module):
    def __init__(self, z_dim=128, class_dim=5, output_dim=1258):  # تغییر به 1258
        super().__init__()
        self.label_emb = nn.Embedding(class_dim, class_dim)  # Embedding ساده برای شرط
        self.net = nn.Sequential(
            nn.Linear(z_dim + class_dim, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, output_dim),  # تغییر به 1258
            nn.Tanh()  # نرمال‌سازی خروجی به [-1, 1]
        )
    
    def forward(self, z, labels):
        c = self.label_emb(labels)  # تبدیل برچسب به Embedding
        x = torch.cat([z, c], dim=1)  # ترکیب نویز و شرط
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim=output_dim + cond_dim):  # به‌روزرسانی با output_dim جدید
        super().__init__()
        self.model = nn.Sequential(
            spectral_norm(nn.Linear(input_dim, 256)),  # 1258 + 5 = 1263
            nn.LeakyReLU(0.2),
            spectral_norm(nn.Linear(256, 128)),
            nn.LeakyReLU(0.2),
            spectral_norm(nn.Linear(128, 1))
        )
    
    def forward(self, x):
        return self.model(x)

def compute_gradient_penalty(D, real_samples, fake_samples, labels):
    alpha = torch.rand(real_samples.size(0), 1).to(device)
    interpolates = (alpha * real_samples + (1 - alpha) * fake_samples).requires_grad_(True)
    class_onehot = F.one_hot(labels, num_classes=cond_dim).float().to(device)
    disc_input_interpolates = torch.cat((interpolates, class_onehot), dim=1)
    d_interpolates = D(disc_input_interpolates)
    fake = torch.ones(real_samples.size(0), 1).to(device)
    gradients = torch.autograd.grad(
        outputs=d_interpolates, inputs=interpolates, grad_outputs=fake,
        create_graph=True, retain_graph=True, only_inputs=True
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

# د) حلقه آموزش
generator = Generator(z_dim=z_dim, class_dim=cond_dim, output_dim=output_dim).to(device)
discriminator = Discriminator().to(device)
g_optimizer = optim.AdamW(generator.parameters(), lr=lr_g, betas=(0.5, 0.9))
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr_d, betas=(0.5, 0.9))
scheduler_g = ReduceLROnPlateau(g_optimizer, 'min', patience=patience, factor=0.5)

d_losses, g_losses = [], []
best_combined_loss = float('inf')
early_stop_counter = 0

for epoch in range(num_epochs_gan):
    epoch_g_losses = []
    for i, (real_dual, labels) in enumerate(train_loader):
        batch_size = real_dual.size(0)
        real_dual, labels = real_dual.to(device), labels.to(device)

        # آموزش Discriminator
        for _ in range(n_critic):
            d_optimizer.zero_grad()
            labels_batch = labels[:real_dual.size(0)]
            class_onehot = F.one_hot(labels_batch, num_classes=cond_dim).float().to(device)
            disc_input_real = torch.cat((real_dual, class_onehot), dim=1)  # 1258 + 5 = 1263
            real_validity = discriminator(disc_input_real)
            z = torch.randn(real_dual.size(0), z_dim).to(device)
            fake_dual = generator(z, labels_batch)  # تولید یک‌بار
            disc_input_fake = torch.cat((fake_dual.detach(), class_onehot), dim=1)
            fake_validity = discriminator(disc_input_fake)
            gradient_penalty = compute_gradient_penalty(discriminator, real_dual, fake_dual, labels_batch)  # بدون detach برای استفاده مجدد
            d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gradient_penalty
            d_loss.backward()
            total_norm_d = 0
            for p in discriminator.parameters():
                if p.grad is not None:
                    total_norm_d += p.grad.data.norm(2).item() ** 2
            total_norm_d = total_norm_d ** 0.5
            torch.nn.utils.clip_grad_norm_(discriminator.parameters(), max_norm=max_norm)
            d_optimizer.step()

        # آموزش Generator
        g_optimizer.zero_grad()
        labels_batch = labels[:real_dual.size(0)]
        class_onehot = F.one_hot(labels_batch, num_classes=cond_dim).float().to(device)  # به‌روزرسانی class_onehot
        z = torch.randn(real_dual.size(0), z_dim).to(device)
        fake_dual = generator(z, labels_batch)
        disc_input_fake = torch.cat((fake_dual, class_onehot), dim=1)
        fake_validity = discriminator(disc_input_fake)
        g_loss = -torch.mean(fake_validity)
        g_loss.backward()
        total_norm_g = 0
        for p in generator.parameters():
            if p.grad is not None:
                total_norm_g += p.grad.data.norm(2).item() ** 2
        total_norm_g = total_norm_g ** 0.5
        torch.nn.utils.clip_grad_norm_(generator.parameters(), max_norm=max_norm)
        g_optimizer.step()

        d_losses.append(d_loss.item())
        g_losses.append(g_loss.item())
        epoch_g_losses.append(g_loss.item())

        if (i + 1) % 5 == 0 or i == 0:
            print(f"Epoch [{epoch+1}/{num_epochs_gan}], Step [{i+1}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}, "
                  f"Grad norm G: {total_norm_g:.4f}, Grad norm D: {total_norm_d:.4f}")

    combined_loss = np.mean(epoch_g_losses)
    scheduler_g.step(combined_loss)

    if best_combined_loss - combined_loss > 1e-3:
        best_combined_loss = combined_loss
        early_stop_counter = 0
        torch.save(generator.state_dict(), f"{output_dir}gan_generator_dual.pt")
        torch.save(generator, f"{output_dir}gan_generator_dual_full.pt")
        torch.save(discriminator.state_dict(), f"{output_dir}gan_discriminator_dual.pt")
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            torch.save(generator.state_dict(), f"{output_dir}gan_generator_dual_last.pt")
            break

# بارگذاری مدل آموزش‌دیده
generator.load_state_dict(torch.load(f"{output_dir}gan_generator_dual.pt"))
generator.eval()

# تولید داده synthetic و ذخیره
generated_data = []
generated_labels = []
num_samples_per_class = 1000
for cls in rare_classes:
    print(f"\nGenerating synthetic samples for Class {cls}...")
    z = torch.randn(num_samples_per_class, z_dim).to(device)
    labels_c = torch.full((num_samples_per_class,), cls, dtype=torch.long).to(device)
    with torch.no_grad():
        fake_dual = generator(z, labels_c)  # خروجی با 1258 بعد
    generated_data.append(fake_dual.cpu().numpy())
    generated_labels.append(np.full(num_samples_per_class, cls))

X_synthetic = np.vstack(generated_data)  # شکل: (num_samples, 1258)
y_synthetic = np.concatenate(generated_labels)

# بررسی ابعاد
print("Generated shape:", X_synthetic.shape)  # باید باشد (n_samples, 1258)
print("PCA expected input:", pca.n_features_in_)  # باید 1258 باشد

# اعمال PCA قبلی روی X_synthetic
X_synthetic_pca = pca.transform(X_synthetic)  # حالا سازگار است

np.save(f"{output_dir}X_synthetic_pca.npy", X_synthetic_pca)
np.save(f"{output_dir}y_synthetic.npy", y_synthetic)

# ادغام داده‌های synthetic و داده‌های اصلی train
X_train_final = np.vstack([X_train_pca, X_synthetic_pca])
y_train_final = np.concatenate([y_train, y_synthetic])

np.save(f"{output_dir}X_train_augmented.npy", X_train_final)
np.save(f"{output_dir}y_train_augmented.npy", y_train_final)

# ارزیابی t-SNE و Silhouette
sil_scores = []
for cls in rare_classes:
    print(f"\nEvaluating synthetic data for Class {cls}...")
    real_idx = np.where(y_train == cls)[0][:500]
    real_samples = X_train_pca[real_idx]
    synthetic_samples = X_synthetic_pca[y_synthetic == cls][:500]

    min_len = min(len(real_samples), len(synthetic_samples))
    real_samples, synthetic_samples = real_samples[:min_len], synthetic_samples[:min_len]

    X_vis = np.concatenate([real_samples, synthetic_samples])
    labels_vis = np.concatenate([np.zeros(min_len), np.ones(min_len)])
    X_tsne = TSNE(n_components=2, random_state=42).fit_transform(X_vis)
    sil_score = silhouette_score(X_vis, labels_vis)
    sil_scores.append(sil_score)

    plt.figure(figsize=(6, 6))
    sns.scatterplot(x=X_tsne[:, 0], y=X_tsne[:, 1], hue=labels_vis, palette=["blue", "red"], alpha=0.5)
    plt.title(f"t-SNE: Real vs Synthetic for Class {cls}")
    plt.savefig(f"{output_dir}tsne_class_{cls}.png", bbox_inches='tight')
    plt.close()

    mean_sil_score = np.mean(sil_scores[-1:])
    print(f"Mean Silhouette Score for Class {cls}: {mean_sil_score:.4f}")

# Classifier Accuracy
for cls in rare_classes:
    real_idx = np.where(y_train == cls)[0][:500]
    real_samples = X_train_pca[real_idx]
    synthetic_samples = X_synthetic_pca[y_synthetic == cls][:500]
    min_len = min(len(real_samples), len(synthetic_samples))
    real_samples, synthetic_samples = real_samples[:min_len], synthetic_samples[:min_len]
    X_train_eval = np.concatenate([real_samples, synthetic_samples])
    y_train_eval = np.concatenate([np.zeros(min_len), np.ones(min_len)])
    clf = MLPClassifier(hidden_layer_sizes=(128,), max_iter=100, random_state=42)
    clf.fit(X_train_eval, y_train_eval)
    accuracy = clf.score(X_train_eval, y_train_eval)
    print(f"Classifier Accuracy for Class {cls}: {accuracy:.4f}")

# محاسبه Overall Mean Silhouette Score
overall_mean_sil = np.mean(sil_scores)
print(f"Overall Mean Silhouette Score: {overall_mean_sil:.4f}")

# ارزیابی t-SNE کل داده‌های synthetic
X_synthetic_tsne = TSNE(n_components=2, random_state=42).fit_transform(X_synthetic_pca)
plt.figure(figsize=(6, 6))
sns.scatterplot(x=X_synthetic_tsne[:, 0], y=X_synthetic_tsne[:, 1], hue=y_synthetic, palette="deep", alpha=0.5)
plt.title("t-SNE of All Synthetic Data")
plt.savefig(f"{output_dir}tsne_all_synthetic.png", bbox_inches='tight')
plt.close()

# ذخیره مقادیر Loss برای تحلیل بعدی
np.save(f"{output_dir}d_losses.npy", np.array(d_losses))
np.save(f"{output_dir}g_losses.npy", np.array(g_losses))

Shape of X_bert_train: (8935, 768)
Shape of X_handcrafted_train: (8935, 490)
Shape of X_train_dual: (8935, 1258), dual_dim: 1258
Shape of X_train_pca: (8935, 512)
Rare classes identified: [2, 3, 4] with counts: [252 853 511]
Epoch [1/30], Step [1], D Loss: 5.7015, G Loss: 0.0961, Grad norm G: 2.4276, Grad norm D: 11.1357
Epoch [1/30], Step [5], D Loss: 5.3089, G Loss: -0.4708, Grad norm G: 2.8066, Grad norm D: 9.2777
Epoch [1/30], Step [10], D Loss: 4.7086, G Loss: -0.6403, Grad norm G: 3.2278, Grad norm D: 7.8916
Epoch [1/30], Step [15], D Loss: 3.4936, G Loss: 0.1192, Grad norm G: 2.5751, Grad norm D: 8.3166
Epoch [1/30], Step [20], D Loss: 3.4598, G Loss: 0.3925, Grad norm G: 2.5058, Grad norm D: 8.1565
Epoch [1/30], Step [25], D Loss: 2.2182, G Loss: 0.5235, Grad norm G: 3.2109, Grad norm D: 7.9312
Epoch [1/30], Step [30], D Loss: 1.4801, G Loss: 0.6953, Grad norm G: 3.4236, Grad norm D: 8.8393
Epoch [1/30], Step [35], D Loss: 2.0730, G Loss: 0.2519, Grad norm G: 3.7987, Grad norm 