In [None]:
df = pd.read_csv('attention-analysis/preprocessed_data_extra.csv')
features = [
    'face_movement', 'body_movement', 'eye_openness_rate',
    'eye_direction_x', 'eye_direction_y', 'mouth_openness_rate',
    'yaw_angle', 'pitch_angle', 'roll_angle'
]
INPUT_SIZE = len(features)
SEQ_LEN = 30

scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

X_seq, y_seq = [], []
for user_id in df['id'].unique():
    user_df = df[df['id'] == user_id].copy()
    for i in range(len(user_df) - SEQ_LEN):
        seq = user_df[features].iloc[i:i + SEQ_LEN].values
        label = user_df['isAttentive'].iloc[i + SEQ_LEN - 1]
        X_seq.append(seq)
        y_seq.append(int(label))

X_seq, y_seq = np.array(X_seq), np.array(y_seq)
X_flat = X_seq.reshape((X_seq.shape[0], -1))
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_flat, y_seq)
X_seq_balanced = X_resampled.reshape((-1, SEQ_LEN, INPUT_SIZE))
y_seq_balanced = y_resampled

X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_seq_balanced, y_seq_balanced, test_size=0.2, stratify=y_seq_balanced, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.2, stratify=y_train_val, random_state=42
)

class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attention_net = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.Tanh(),
            nn.Linear(hidden_size // 2, 1)
        )

    def forward(self, gru_outputs):
        attention_weights = self.attention_net(gru_outputs).squeeze(2)
        soft_attention_weights = F.softmax(attention_weights, 1)
        context_vector = torch.bmm(gru_outputs.transpose(1, 2), soft_attention_weights.unsqueeze(2)).squeeze(2)
        return context_vector

class BiGRUAttentionClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, num_classes=2):
        super().__init__()
        self.gru = nn.GRU(
            input_size, hidden_size, num_layers,
            batch_first=True, bidirectional=True, 
            dropout=dropout if num_layers > 1 else 0
        )
        self.attention = Attention(hidden_size * 2) # Bi-directional olduğu için *2
        self.classifier = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        gru_out, _ = self.gru(x)
        context_vector = self.attention(gru_out)
        return self.classifier(context_vector)

best_params = {
    'learning_rate': 0.0007582263943937126,
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': 0.12655376117645975,
    'batch_size': 32
}
print("en iyi parametreler:")
print(best_params)

print("\n--- En İyi Parametrelerle Final Model Eğitiliyor ---")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

final_train_dataset = SequenceDataset(np.concatenate((X_train, X_val)), np.concatenate((y_train, y_val)))
val_dataset = SequenceDataset(X_val, y_val)
test_dataset = SequenceDataset(X_test, y_test)

final_train_loader = DataLoader(final_train_dataset, batch_size=best_params['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'])
test_loader = DataLoader(test_dataset, batch_size=best_params['batch_size'])

final_model = BiGRUAttentionClassifier(
    input_size=INPUT_SIZE,
    hidden_size=best_params['hidden_size'],
    num_layers=best_params['num_layers'],
    dropout=best_params['dropout']
).to(device)

optimizer = torch.optim.Adam(final_model.parameters(), lr=best_params['learning_rate'])
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=10, verbose=True)

train_losses, val_losses = [], []
FINAL_EPOCHS = 100

for epoch in range(FINAL_EPOCHS):
    final_model.train()
    total_train_loss = 0
    for xb, yb in final_train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        pred = final_model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
    avg_train_loss = total_train_loss / len(final_train_loader)
    train_losses.append(avg_train_loss)
    
    final_model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = final_model(xb)
            loss = criterion(pred, yb)
            total_val_loss += loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    
    scheduler.step(avg_val_loss)
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:03d}/{FINAL_EPOCHS} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

plt.figure(figsize=(12, 6))
plt.plot(train_losses, label='Eğitim Kaybı (Training Loss)')
plt.plot(val_losses, label='Doğrulama Kaybı (Validation Loss)')
plt.title('Eğitim ve Doğrulama Kaybı Grafiği (Bi-GRU + Attention)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

final_model.eval()
y_true, y_pred, y_prob = [], [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        outputs = final_model(xb)
        probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        y_true.extend(yb.numpy())
        y_pred.extend(preds)
        y_prob.extend(probs)

print(classification_report(y_true, y_pred, target_names=['Dikkatsiz (0)', 'Dikkatli (1)']))
print("\nConfusion Matrix (Text):")
print(confusion_matrix(y_true, y_pred))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
import seaborn as sns
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Dikkatsiz (0)', 'Dikkatli (1)'], yticklabels=['Dikkatsiz (0)', 'Dikkatli (1)'])
plt.xlabel("Tahmin Edilen Etiket")
plt.ylabel("Gerçek Etiket")
plt.title("Confusion Matrix (Test Seti)")
plt.show()

from sklearn.metrics import roc_curve, roc_auc_score

fpr, tpr, _ = roc_curve(y_true, y_prob)
auc_score = roc_auc_score(y_true, y_prob)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC eğrisi (AUC = {auc_score:.3f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Rastgele Tahmin')
plt.xlabel('False Positive Oranı')
plt.ylabel('True Positive Oranı')
plt.title('Receiver Operating Characteristic (ROC) Eğrisi')
plt.legend(loc="lower right")
plt.show()

print("\n--- Model ve Scaler kaydediliyor... ---")
torch.save(final_model.state_dict(), "bigru_attention_model.pt")
joblib.dump(scaler, "minmax_scaler_bigru.pkl")
print("Model 'bigru_attention_model.pt' olarak kaydedildi.")
print("Scaler 'minmax_scaler_bigru.pkl' olarak kaydedildi.")

--- Final Test Raporu ---
               precision    recall  f1-score   support

Dikkatsiz (0)       0.91      0.82      0.86       144
 Dikkatli (1)       0.83      0.92      0.87       143

     accuracy                           0.87       287
    macro avg       0.87      0.87      0.87       287
 weighted avg       0.87      0.87      0.87       287


Confusion Matrix (Text):
[[118  26]
 [ 12 131]]