In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import precision_recall_fscore_support

# چک دسترسی GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU available, using CPU. Consider installing PyTorch with CUDA support.")

# تعریف دیتاست
class RNADataset(Dataset):
    def __init__(self, X_handcrafted, X_bert, labels):
        self.X_handcrafted = torch.FloatTensor(X_handcrafted)
        self.X_bert = torch.FloatTensor(X_bert)
        self.labels = torch.LongTensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.X_handcrafted[idx], self.X_bert[idx], self.labels[idx]

# معماری مدل
class DualAttentionModel(nn.Module):
    def __init__(self, input_dim_hand=490, input_dim_bert=768, d_model=256, num_classes=5):
        super(DualAttentionModel, self).__init__()
        
        self.hand_fc = nn.Linear(input_dim_hand, d_model)
        self.hand_attn = nn.MultiheadAttention(d_model, num_heads=4, batch_first=True)
        self.hand_pool = nn.AdaptiveAvgPool1d(1)

        self.bert_fc = nn.Linear(input_dim_bert, d_model)
        self.bert_attn = nn.MultiheadAttention(d_model, num_heads=4, batch_first=True)
        self.bert_pool = nn.AdaptiveAvgPool1d(1)

        self.fusion = nn.Linear(d_model * 2, d_model)
        self.classifier = nn.Sequential(
            nn.Linear(d_model, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x_hand, x_bert):
        x_hand = self.hand_fc(x_hand)
        x_hand = x_hand.unsqueeze(1)
        x_hand, _ = self.hand_attn(x_hand, x_hand, x_hand)
        x_hand = x_hand.squeeze(1)

        x_bert = self.bert_fc(x_bert)
        x_bert = x_bert.unsqueeze(1)
        x_bert, _ = self.bert_attn(x_bert, x_bert, x_bert)
        x_bert = x_bert.squeeze(1)

        fusion = torch.cat([x_hand, x_bert], dim=-1)
        fusion = self.fusion(fusion)

        out = self.classifier(fusion)
        return out

# بارگذاری داده‌ها
X_handcrafted = np.load("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\X_handcrafted.npy")
X_bert = np.load("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\X_bert.npy")
df_rnalocate = pd.read_csv("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\rnalocate_dataset.csv")
labels = df_rnalocate['label'].values

# تنظیم وزن برای تعادل داده‌ها
class_counts = np.bincount(labels)
num_samples = len(labels)
weights = 1.0 / class_counts[labels]
sampler = WeightedRandomSampler(weights, num_samples, replacement=True)

# ساخت دیتالودر
dataset = RNADataset(X_handcrafted, X_bert, labels)
dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)

# تعریف مدل، لاس و بهینه‌ساز
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DualAttentionModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)
scaler = GradScaler()

# لیست‌ها برای ذخیره معیارها
epoch_losses = []
epoch_accs = []
epoch_precisions = []
epoch_recalls = []
epoch_f1s = []

# حلقه آموزش
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    for x_hand, x_bert, y in dataloader:
        x_hand, x_bert, y = x_hand.to(device), x_bert.to(device), y.to(device)
        
        optimizer.zero_grad()
        with autocast():
            outputs = model(x_hand, x_bert)
            loss = criterion(outputs, y)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        total_loss += loss.item()
        
        with torch.no_grad():
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    
    avg_loss = total_loss / len(dataloader)
    acc = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    epoch_losses.append(avg_loss)
    epoch_accs.append(acc)
    epoch_precisions.append(precision.mean())  # میانگین Precision برای هر کلاس
    epoch_recalls.append(recall.mean())       # میانگین Recall برای هر کلاس
    epoch_f1s.append(f1.mean())               # میانگین F1 برای هر کلاس
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Acc: {acc:.4f}, Prec: {precision.mean():.4f}, Recall: {recall.mean():.4f}, F1: {f1.mean():.4f}")
    scheduler.step(avg_loss)

# محاسبه و چاپ Precision, Recall, F1-Score نهایی
final_report = classification_report(all_labels, all_preds, digits=4)
print("\nFinal Classification Report:")
print(final_report)

# محاسبه و رسم ماتریس درهم‌ریختگی
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(5), yticklabels=range(5))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\confusion_matrix2.png")
plt.close()

# رسم و ذخیره نمودار Precision, Recall, F1
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.plot(range(1, num_epochs + 1), epoch_precisions, label='Precision', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Precision Over Epochs')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(range(1, num_epochs + 1), epoch_recalls, label='Recall', color='green')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Recall Over Epochs')
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(range(1, num_epochs + 1), epoch_f1s, label='F1-Score', color='red')
plt.xlabel('Epoch')
plt.ylabel('F1-Score')
plt.title('F1-Score Over Epochs')
plt.legend()

plt.tight_layout()
plt.savefig("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\prf_metrics2.png")
plt.close()

# ذخیره مدل
torch.save(model.state_dict(), "F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\dual_attention_model2.pth")
print("Model saved successfully!")

# رسم و ذخیره نمودار Loss و Accuracy
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), epoch_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), epoch_accs, label='Training Accuracy', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training Accuracy Over Epochs')
plt.legend()

plt.tight_layout()
plt.savefig("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\training_metrics2.png")
plt.close()

Epoch 184/200, Loss: 0.2766, Acc: 0.8915, Prec: 0.8893, Recall: 0.8919, F1: 0.8895
Epoch 185/200, Loss: 0.2841, Acc: 0.8896, Prec: 0.8859, Recall: 0.8889, F1: 0.8864
Epoch 186/200, Loss: 0.2718, Acc: 0.8947, Prec: 0.8910, Recall: 0.8930, F1: 0.8910
Epoch 187/200, Loss: 0.2766, Acc: 0.8926, Prec: 0.8890, Recall: 0.8917, F1: 0.8893
Epoch 188/200, Loss: 0.2885, Acc: 0.8899, Prec: 0.8881, Recall: 0.8911, F1: 0.8886
Epoch 189/200, Loss: 0.2833, Acc: 0.8890, Prec: 0.8857, Recall: 0.8888, F1: 0.8862
Epoch 190/200, Loss: 0.2694, Acc: 0.8932, Prec: 0.8909, Recall: 0.8930, F1: 0.8911
Epoch 191/200, Loss: 0.2772, Acc: 0.8901, Prec: 0.8872, Recall: 0.8906, F1: 0.8878
Epoch 192/200, Loss: 0.2860, Acc: 0.8867, Prec: 0.8834, Recall: 0.8868, F1: 0.8841
Epoch 193/200, Loss: 0.2658, Acc: 0.8917, Prec: 0.8893, Recall: 0.8923, F1: 0.8898
Epoch 194/200, Loss: 0.2791, Acc: 0.8907, Prec: 0.8880, Recall: 0.8910, F1: 0.8886
Epoch 195/200, Loss: 0.2766, Acc: 0.8908, Prec: 0.8890, Recall: 0.8924, F1: 0.8895
Epoc

In [1]:
import numpy as np
import pandas as pd

# لود داده‌ها
X_handcrafted = np.load("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\X_handcrafted.npy")
X_bert = np.load("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\X_bert.npy")
df_rnalocate = pd.read_csv("F:\\payan-nameh\\faz2 . 1404.04.02\\Date\\RNALocate\\rnalocate_dataset.csv")
labels = df_rnalocate['label'].values

# شمارش تعداد نمونه‌ها در هر کلاس
class_counts = np.bincount(labels)
num_classes = len(class_counts)

# چاپ نتایج
print("تعداد نمونه‌ها در هر کلاس:")
for i in range(num_classes):
    count = class_counts[i] if i < len(class_counts) else 0
    print(f"کلاس {i}: {count} نمونه")

تعداد نمونه‌ها در هر کلاس:
کلاس 0: 5310 نمونه
کلاس 1: 4855 نمونه
کلاس 2: 350 نمونه
کلاس 3: 1185 نمونه
کلاس 4: 710 نمونه
