<a href="https://colab.research.google.com/github/emrealty/AlertProject/blob/main/6_MSE_Best_MODEL_%E2%80%94_FULL_SCRIPT_Konfig_RoPE_%2B_Post_LayerNorm_%2B_ReLU_FFN_%2B_Gated_Cross_Attn_(ablation_config_idx_%3D_66).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
###########################################
# MSE_Best MODEL — FULL SCRIPT
# Konfig: RoPE + Post-LayerNorm + ReLU FFN + Gated Cross-Attn
# (ablation: config_idx = 66)
###########################################

######################
# [BÖLÜM 1: Ortam Kurulumu, Drive, Paketler]
######################
print("===== 1) Ortam Kurulumu ve Drive Bağlantısı =====")

from google.colab import drive
drive.mount('/content/drive')

!pip install rdkit-pypi -q
!pip install torch -q
!pip install transformers -q
!pip install scikit-learn -q
!pip install pandas -q
!pip install numpy -q

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import re
import time
import math
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, average_precision_score
from math import sqrt
from scipy.stats import pearsonr

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Kullanılan cihaz:", device)

######################
# [BÖLÜM 2: Veri Yükleme ve İnceleme]
######################
print("\n===== 2) Veri Okuma ve İnceleme =====")

# Burada davis benzeri bir CSV var. Yolunu gerekirse güncelle!
file_path = '/content/drive/MyDrive/TransformDTA/davis_cleaned.csv'

df = pd.read_csv(file_path, sep=',', header=0)

# Sütunları (ID, Target, SMILES, Sequence, Label) olarak rename
df.rename(columns={
    "Compound_ID": "ID",
    "Protein_ID": "Target",
    "SMILES": "SMILES",
    "Protein_Sequence": "Sequence",
    "Label": "Label"
}, inplace=True)

print("DataFrame ilk 5 satır:")
print(df.head())
print("\nVeri boyutu:", df.shape)
print("Label istatistikleri:")
print(df['Label'].describe())

######################
# [BÖLÜM 3: Train / Val / Test Split]
######################
print("\n===== 3) Train-Val-Test Split =====")

train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

print("Train:", train_df.shape, "Val:", val_df.shape, "Test:", test_df.shape)

######################
# [BÖLÜM 4: Tokenizasyon ve PyTorch Dataset/DataLoader]
######################
print("\n===== 4) Tokenizasyon, Dataset, DataLoader =====")

# Protein tokenizasyonu (karakter düzeyinde)
def tokenize_protein(seq):
    return list(seq.strip())

# SMILES tokenizasyonu (Cl, Br vb. iki harfli öğelerle)
_SMILES_REGEX = re.compile(
    r"\[\w+\]|"       # [NH3+], [O-]
    r"\%\d{2}|"       # yüzük kapama %12
    r"Br|Cl|"         # iki harfli elementler
    r"Si|Se|Na|Li|Mg|Ca|Zn|Fe|Cu|Mn|Al|Ag|Sn|Hg|Pb|Bi|Ne|He|Ar|Kr|Xe|"
    r"@@?|"           # stereo
    r"=|#|"           # bağlar
    r"\(|\)|"         # parantez
    r"\.|"            # nokta
    r"\d|"            # rakam
    r"[A-Za-z]|"      # tek harf element
    r"\+|\-|\*|\/|\\" # işaretler
)

def tokenize_smiles(smi: str):
    return _SMILES_REGEX.findall(smi.strip())

# Test
print(f"\nSMILES Tokenizer Testi:")
test_smiles = "CCCCl"
print(f"  Input: {test_smiles}")
print(f"  Tokens: {tokenize_smiles(test_smiles)}  (Beklenen: ['C','C','C','Cl'])")
test_smiles2 = "c1ccccc1Br"
print(f"  Input: {test_smiles2}")
print(f"  Tokens: {tokenize_smiles(test_smiles2)}")

# Vocab
all_prot_tokens = set()
for seq in train_df['Sequence']:
    all_prot_tokens.update(tokenize_protein(seq))

all_smi_tokens = set()
for smi in train_df['SMILES']:
    all_smi_tokens.update(tokenize_smiles(smi))

prot_special_tokens = ['<pad>', '<unk>', '<cls>', '<sep>']
smi_special_tokens  = ['<pad>', '<unk>', '<cls>', '<sep>']

prot_vocab_list = prot_special_tokens + sorted(list(all_prot_tokens))
smi_vocab_list  = smi_special_tokens + sorted(list(all_smi_tokens))

protein_vocab = {token: idx for idx, token in enumerate(prot_vocab_list)}
smiles_vocab  = {token: idx for idx, token in enumerate(smi_vocab_list)}

print(f"\n✅ Protein vocab size: {len(protein_vocab)}")
print(f"✅ SMILES vocab size:  {len(smiles_vocab)}")
print(f"   SMILES vocab'da 'Cl' var mı? {'Cl' in smiles_vocab}")
print(f"   SMILES vocab'da 'Br' var mı? {'Br' in smiles_vocab}")

class DTADataset(Dataset):
    def __init__(self, df, protein_vocab, smiles_vocab,
                 max_prot_len=1000, max_smi_len=100):
        self.df = df.reset_index(drop=True)
        self.protein_vocab = protein_vocab
        self.smiles_vocab  = smiles_vocab
        self.max_prot_len  = max_prot_len
        self.max_smi_len   = max_smi_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        seq = row['Sequence']
        smi = row['SMILES']
        label = float(row['Label'])

        prot_toks = tokenize_protein(seq)[:self.max_prot_len]
        smi_toks  = tokenize_smiles(smi)[:self.max_smi_len]

        prot_ids = [self.protein_vocab.get(t, self.protein_vocab['<unk>']) for t in prot_toks]
        smi_ids  = [self.smiles_vocab.get(t, self.smiles_vocab['<unk>']) for t in smi_toks]

        prot_pad_len = self.max_prot_len - len(prot_ids)
        smi_pad_len  = self.max_smi_len - len(smi_ids)

        prot_ids += [self.protein_vocab['<pad>']] * prot_pad_len
        smi_ids  += [self.smiles_vocab['<pad>']] * smi_pad_len

        return {
            'protein_input': torch.tensor(prot_ids, dtype=torch.long),
            'smiles_input':  torch.tensor(smi_ids,  dtype=torch.long),
            'label':         torch.tensor(label,   dtype=torch.float)
        }

batch_size = 32
train_dataset = DTADataset(train_df, protein_vocab, smiles_vocab)
val_dataset   = DTADataset(val_df,   protein_vocab, smiles_vocab)
test_dataset  = DTADataset(test_df,  protein_vocab, smiles_vocab)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

print(f"train_loader: {len(train_dataset)} örnek, val_loader: {len(val_dataset)}, test_loader: {len(test_dataset)}")

######################
# [BÖLÜM 5: MODEL — RoPE + Post-LN + ReLU FFN + Gated Cross-Attn]
######################
print("\n===== 5) Model (RoPE + Post-LN + Gated Cross-Attn) =====")

class RotaryEmbedding(nn.Module):
    """
    RoPE için basit rotary embedding.
    head_dim üzerinden sin/cos tabanlı faz döndürme uygular.
    """
    def __init__(self, dim, base=10000):
        super().__init__()
        self.dim = dim
        inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, seq_len, device=None):
        if device is None:
            device = self.inv_freq.device
        t = torch.arange(seq_len, device=device, dtype=torch.float)  # [L]
        freqs = torch.einsum('i,j->ij', t, self.inv_freq)           # [L, dim/2]
        emb = torch.cat((freqs, freqs), dim=-1)                     # [L, dim]
        cos = emb.cos()[None, :, None, :]                           # [1,L,1,dim]
        sin = emb.sin()[None, :, None, :]
        return cos, sin

def apply_rotary_pos_emb(x, cos, sin):
    """
    x:   [B, L, H, D]
    cos: [1, L, 1, D]
    sin: [1, L, 1, D]
    """
    D = x.size(-1)
    x1 = x[..., : D//2]
    x2 = x[..., D//2:]
    cos_half = cos[..., : D//2]
    sin_half = sin[..., : D//2]
    x_rot_first = x1 * cos_half - x2 * sin_half
    x_rot_second = x1 * sin_half + x2 * cos_half
    return torch.cat([x_rot_first, x_rot_second], dim=-1)

class RoPESelfAttention(nn.Module):
    """
    RoPE kullanan Multi-Head Self-Attention
    """
    def __init__(self, d_model, nhead, dropout=0.1):
        super().__init__()
        assert d_model % nhead == 0
        self.d_model = d_model
        self.nhead = nhead
        self.head_dim = d_model // nhead

        self.q_proj = nn.Linear(d_model, d_model)
        self.k_proj = nn.Linear(d_model, d_model)
        self.v_proj = nn.Linear(d_model, d_model)
        self.o_proj = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
        self.rotary = RotaryEmbedding(self.head_dim)

    def forward(self, x, key_padding_mask=None):
        # x: [B, L, D]
        B, L, D = x.shape
        H = self.nhead

        q = self.q_proj(x).view(B, L, H, self.head_dim)
        k = self.k_proj(x).view(B, L, H, self.head_dim)
        v = self.v_proj(x).view(B, L, H, self.head_dim)

        cos, sin = self.rotary(L, x.device)
        q = apply_rotary_pos_emb(q, cos, sin)
        k = apply_rotary_pos_emb(k, cos, sin)

        q = q.permute(0, 2, 1, 3)  # [B,H,L,Dh]
        k = k.permute(0, 2, 1, 3)
        v = v.permute(0, 2, 1, 3)

        attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)  # [B,H,L,L]
        if key_padding_mask is not None:
            # key_padding_mask: [B, L], True = pad
            mask = key_padding_mask[:, None, None, :]  # [B,1,1,L]
            attn_scores = attn_scores.masked_fill(mask, float('-inf'))

        attn_probs = torch.softmax(attn_scores, dim=-1)
        attn_probs = self.dropout(attn_probs)

        attn_out = torch.matmul(attn_probs, v)  # [B,H,L,Dh]
        attn_out = attn_out.permute(0, 2, 1, 3).contiguous().view(B, L, D)
        out = self.o_proj(attn_out)
        return out

class RoPECrossAttention(nn.Module):
    """
    RoPE kullanan Multi-Head Cross-Attention
    (q ve k tarafına ayrı rotary uygulanıyor)
    """
    def __init__(self, d_model, nhead, dropout=0.1):
        super().__init__()
        assert d_model % nhead == 0
        self.d_model = d_model
        self.nhead = nhead
        self.head_dim = d_model // nhead

        self.q_proj = nn.Linear(d_model, d_model)
        self.k_proj = nn.Linear(d_model, d_model)
        self.v_proj = nn.Linear(d_model, d_model)
        self.o_proj = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
        self.rotary_q = RotaryEmbedding(self.head_dim)
        self.rotary_k = RotaryEmbedding(self.head_dim)

    def forward(self, q_input, kv_input, kv_mask=None):
        # q_input: [B, Lq, D], kv_input: [B, Lk, D]
        B, Lq, D = q_input.shape
        Lk = kv_input.size(1)
        H = self.nhead

        q = self.q_proj(q_input).view(B, Lq, H, self.head_dim)
        k = self.k_proj(kv_input).view(B, Lk, H, self.head_dim)
        v = self.v_proj(kv_input).view(B, Lk, H, self.head_dim)

        cos_q, sin_q = self.rotary_q(Lq, q_input.device)
        cos_k, sin_k = self.rotary_k(Lk, kv_input.device)
        q = apply_rotary_pos_emb(q, cos_q, sin_q)
        k = apply_rotary_pos_emb(k, cos_k, sin_k)

        q = q.permute(0, 2, 1, 3)  # [B,H,Lq,Dh]
        k = k.permute(0, 2, 1, 3)  # [B,H,Lk,Dh]
        v = v.permute(0, 2, 1, 3)  # [B,H,Lk,Dh]

        attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)  # [B,H,Lq,Lk]
        if kv_mask is not None:
            mask = kv_mask[:, None, None, :]  # [B,1,1,Lk]
            attn_scores = attn_scores.masked_fill(mask, float('-inf'))

        attn_probs = torch.softmax(attn_scores, dim=-1)
        attn_probs = self.dropout(attn_probs)

        attn_out = torch.matmul(attn_probs, v)  # [B,H,Lq,Dh]
        attn_out = attn_out.permute(0, 2, 1, 3).contiguous().view(B, Lq, D)
        out = self.o_proj(attn_out)
        return out

class TransformerEncoderBlock(nn.Module):
    """
    - Self-Attention: RoPE
    - FFN: ReLU MLP
    - Normalizasyon: Post-LayerNorm
    """
    def __init__(self, d_model=256, nhead=8, dim_feedforward=1024, dropout=0.1):
        super().__init__()
        self.self_attn = RoPESelfAttention(d_model, nhead, dropout)
        self.dropout = nn.Dropout(dropout)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        self.ffn = nn.Sequential(
            nn.Linear(d_model, dim_feedforward),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dim_feedforward, d_model),
            nn.Dropout(dropout)
        )

    def forward(self, x, key_padding_mask=None):
        attn_out = self.self_attn(x, key_padding_mask=key_padding_mask)
        x = self.norm1(x + self.dropout(attn_out))
        ff = self.ffn(x)
        x = self.norm2(x + ff)
        return x

class CrossAttentionBlock(nn.Module):
    """
    Protein ↔ SMILES arası Cross-Attention
    - RoPE
    - Gated residual (attn_type = gated)
    - Post-LayerNorm
    """
    def __init__(self, d_model=256, nhead=8, dropout=0.1, gated=True):
        super().__init__()
        self.cross_attn_p2s = RoPECrossAttention(d_model, nhead, dropout)
        self.cross_attn_s2p = RoPECrossAttention(d_model, nhead, dropout)
        self.dropout = nn.Dropout(dropout)
        self.norm_p = nn.LayerNorm(d_model)
        self.norm_s = nn.LayerNorm(d_model)
        self.gated = gated
        if gated:
            self.gate_p = nn.Linear(d_model, d_model)
            self.gate_s = nn.Linear(d_model, d_model)

    def forward(self, p, s, p_mask=None, s_mask=None):
        # p: protein [B,Lp,D], s: smiles [B,Ls,D]

        # Protein, SMILES'e dikkat eder
        p2 = self.cross_attn_p2s(p, s, kv_mask=s_mask)
        if self.gated:
            gate_p = torch.sigmoid(self.gate_p(p))
            p = self.norm_p(p + self.dropout(gate_p * p2))
        else:
            p = self.norm_p(p + self.dropout(p2))

        # SMILES, Protein'e dikkat eder
        s2 = self.cross_attn_s2p(s, p, kv_mask=p_mask)
        if self.gated:
            gate_s = torch.sigmoid(self.gate_s(s))
            s = self.norm_s(s + self.dropout(gate_s * s2))
        else:
            s = self.norm_s(s + self.dropout(s2))

        return p, s

class CrossAttentionFusionModel(nn.Module):
    """
    MSE-best Ablasyon Mimari:
    - Positional Encoding: RoPE (token embedding + rotary, ekstra pos emb yok)
    - Normalizasyon: Post-LayerNorm
    - FFN: ReLU MLP
    - Cross-Attn: Gated RoPE Cross-Attn
    """
    def __init__(self,
                 prot_vocab_size,
                 smi_vocab_size,
                 prot_max_len=1000,
                 smi_max_len=100,
                 d_model=256,
                 nhead=8,
                 num_encoder_layers=2,
                 num_cross_layers=1,
                 dim_feedforward=1024,
                 dropout=0.1,
                 padding_idx=0):
        super().__init__()

        self.padding_idx = padding_idx

        # Sadece token embedding (pozisyon bilgisi RoPE ile atn içinde)
        self.prot_token_emb = nn.Embedding(prot_vocab_size, d_model, padding_idx=padding_idx)
        self.smi_token_emb  = nn.Embedding(smi_vocab_size,  d_model, padding_idx=padding_idx)

        # Encoder blokları
        self.prot_encoders = nn.ModuleList([
            TransformerEncoderBlock(d_model, nhead, dim_feedforward, dropout)
            for _ in range(num_encoder_layers)
        ])
        self.smi_encoders = nn.ModuleList([
            TransformerEncoderBlock(d_model, nhead, dim_feedforward, dropout)
            for _ in range(num_encoder_layers)
        ])

        # Gated Cross-Attn katmanları
        self.cross_layers = nn.ModuleList([
            CrossAttentionBlock(d_model, nhead, dropout, gated=True)
            for _ in range(num_cross_layers)
        ])

        # Çıkış başlığı
        self.fc = nn.Sequential(
            nn.Linear(d_model*2, 512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 1)
        )

    def forward(self, prot_in, smi_in):
        # Maskeler
        prot_mask = (prot_in == self.padding_idx)   # [B,Lp]
        smi_mask  = (smi_in  == self.padding_idx)   # [B,Ls]

        # Embedding
        p_embed = self.prot_token_emb(prot_in)      # [B,Lp,D]
        s_embed = self.smi_token_emb(smi_in)        # [B,Ls,D]

        # Encoder katmanları
        for enc in self.prot_encoders:
            p_embed = enc(p_embed, key_padding_mask=prot_mask)
        for enc in self.smi_encoders:
            s_embed = enc(s_embed, key_padding_mask=smi_mask)

        # Gated Cross-Attn katmanları
        for cross in self.cross_layers:
            p_embed, s_embed = cross(p_embed, s_embed, p_mask=prot_mask, s_mask=smi_mask)

        # Masked mean pooling
        prot_mask_expanded = prot_mask.unsqueeze(-1).expand_as(p_embed)
        smi_mask_expanded  = smi_mask.unsqueeze(-1).expand_as(s_embed)

        p_sum = (p_embed * (~prot_mask_expanded)).sum(dim=1)
        p_cnt = (~prot_mask).sum(dim=1, keepdim=True).clamp(min=1)
        p_vec = p_sum / p_cnt

        s_sum = (s_embed * (~smi_mask_expanded)).sum(dim=1)
        s_cnt = (~smi_mask).sum(dim=1, keepdim=True).clamp(min=1)
        s_vec = s_sum / s_cnt

        out = self.fc(torch.cat([p_vec, s_vec], dim=1)).squeeze(-1)
        return out

######################
# [BÖLÜM 6: Eğitim / Değerlendirme Fonksiyonları ve Metrikler]
######################
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in dataloader:
        prot_in = batch['protein_input'].to(device)
        smi_in  = batch['smiles_input'].to(device)
        labels  = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(prot_in, smi_in)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(dataloader)

@torch.no_grad()
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    preds, trues = [], []
    for batch in dataloader:
        prot_in = batch['protein_input'].to(device)
        smi_in  = batch['smiles_input'].to(device)
        labels  = batch['label'].to(device)
        outputs = model(prot_in, smi_in)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        preds.extend(outputs.detach().cpu().numpy())
        trues.extend(labels.detach().cpu().numpy())
    avg_loss = total_loss / len(dataloader)
    return avg_loss, np.array(preds), np.array(trues)

def concordance_index(y_true, y_pred):
    n, n_conc = 0, 0
    for i in range(len(y_true)):
        for j in range(i+1, len(y_true)):
            if y_true[i] != y_true[j]:
                n += 1
                if (y_true[i] > y_true[j] and y_pred[i] > y_pred[j]) or \
                   (y_true[i] < y_true[j] and y_pred[i] < y_pred[j]):
                    n_conc += 1
    return 0 if n == 0 else n_conc / n

def rm2_score(y_true, y_pred):
    r, _ = pearsonr(y_true, y_pred)
    r_sq = r**2
    denom = np.sum(y_true*y_true) - len(y_true)*(np.mean(y_true)**2)
    if abs(denom) < 1e-8:
        return 0.0
    slope = (np.sum(y_true*y_pred) - len(y_true)*np.mean(y_true)*np.mean(y_pred)) / denom
    intercept = np.mean(y_pred) - slope*np.mean(y_true)
    y_pred_reg = slope*y_true + intercept
    r0, _ = pearsonr(y_true, y_pred_reg)
    r0_sq = r0**2
    return r_sq * (1 - np.sqrt(abs(r_sq - r0_sq)))

def aupr_score(y_true, y_pred, threshold=7.0):
    labels_bin = np.array([1 if val > threshold else 0 for val in y_true])
    return average_precision_score(labels_bin, y_pred)

######################
# [BÖLÜM 7: MSE_Best MODEL — Eğitim & Değerlendirme]
######################
print("\n===== 7) MSE_Best: Eğitim & Değerlendirme =====")

common_cfg = dict(
    prot_vocab_size=len(protein_vocab),
    smi_vocab_size=len(smiles_vocab),
    prot_max_len=1000,
    smi_max_len=100,
    d_model=256,
    nhead=8,
    num_encoder_layers=2,
    num_cross_layers=1,
    dim_feedforward=1024,
    dropout=0.1,
    padding_idx=0
)

EPOCHS = 20
LR = 1e-4
criterion = nn.MSELoss()

name = "MSE_Best_RoPE_PostLN_ReLU_Gated"
print(f"\n--- {name} ---")
set_seed(42)  # tekrarlanabilirlik
model = CrossAttentionFusionModel(**common_cfg).to(device)
optimizer = optim.Adam(model.parameters(), lr=LR)

best_val_loss = float('inf')
best_path = f"best_{name}.pt"

for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_preds, val_trues = evaluate(model, val_loader, criterion, device)
    val_rmse = sqrt(mean_squared_error(val_trues, val_preds))
    dt = time.time() - t0
    print(f"[{name}] Epoch {epoch:02d}/{EPOCHS} "
          f"TrainLoss={train_loss:.4f} ValLoss={val_loss:.4f} ValRMSE={val_rmse:.4f} ({dt:.1f}s)")
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), best_path)
        print(f"  -> Best model saved to {best_path}")

# Test: en iyi checkpoint ile
model.load_state_dict(torch.load(best_path, map_location=device))
test_loss, test_preds, test_trues = evaluate(model, test_loader, criterion, device)
test_rmse = sqrt(mean_squared_error(test_trues, test_preds))
test_ci   = concordance_index(test_trues, test_preds)
test_rm2  = rm2_score(test_trues, test_preds)
test_aupr = aupr_score(test_trues, test_preds, threshold=7.0)

print(f"[{name}] Test MSE:   {test_loss:.4f}")
print(f"[{name}] Test RMSE:  {test_rmse:.4f}")
print(f"[{name}] Test CI:    {test_ci:.4f}")
print(f"[{name}] Test r_m^2: {test_rm2:.4f}")
print(f"[{name}] Test AUPR:  {test_aupr:.4f}")

print("\n✅ MSE_Best (RoPE + post-LN + ReLU FFN + gated cross-attn) değerlendirmesi tamamlandı.")


===== 1) Ortam Kurulumu ve Drive Bağlantısı =====
Mounted at /content/drive
[31mERROR: Could not find a version that satisfies the requirement rdkit-pypi (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for rdkit-pypi[0m[31m
[0mKullanılan cihaz: cuda

===== 2) Veri Okuma ve İnceleme =====
DataFrame ilk 5 satır:
         ID        Target                                             SMILES  \
0  11314340          AAK1  CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC...   
1  11314340   ABL1(E255K)  CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC...   
2  11314340   ABL1(F317I)  CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC...   
3  11314340  ABL1(F317I)p  CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC...   
4  11314340   ABL1(F317L)  CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC...   

                                            Sequence     Label  
0  MKKFFDSRREQGGSGLGSGSSGGGGSTSGLGSGYIGRVFGIGRQQV...  7.366532  
1  PFWKILNPLLERGTYYYFMGQQPGKVLGDQRRPSLPALHF