In [None]:
df=pd.read_csv('attention-analysis/preprocessed_data_extra.csv')
df = df[df['id'] == 'yunus']  # Sadece Yunus

features = [
    'face_movement', 'body_movement', 'eye_openness_rate',
    'eye_direction_x', 'eye_direction_y', 'mouth_openness_rate',
    'yaw_angle', 'pitch_angle', 'roll_angle'
]

SEQ_LEN = 30

# 4. Sekans oluştur
X_seq, y_seq = [], []
for i in range(len(df) - SEQ_LEN):
    seq = df[features].iloc[i:i+SEQ_LEN].values
    label = df['isAttentive'].iloc[i+SEQ_LEN-1]
    X_seq.append(seq)
    y_seq.append(int(label))

X_seq, y_seq = np.array(X_seq), np.array(y_seq)

# 5. Veri Dengeleme (undersampling)
from imblearn.under_sampling import RandomUnderSampler
X_flat = X_seq.reshape((X_seq.shape[0], -1))
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_flat, y_seq)
X_seq_balanced = X_resampled.reshape((-1, SEQ_LEN, len(features)))
y_seq_balanced = y_resampled

# 6. Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_seq_balanced, y_seq_balanced, test_size=0.2, stratify=y_seq_balanced, random_state=42)

# 7. PyTorch Dataset
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_loader = DataLoader(SequenceDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(SequenceDataset(X_test, y_test), batch_size=32)

# 8. BiLSTM Model
import torch.nn as nn

class BiLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=1, num_classes=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size * 2, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # son zaman adımı
        return self.classifier(out)

# 9. Eğitim
device = torch.device("cuda")
model = BiLSTMClassifier(input_size=len(features)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)


for epoch in range(50):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1:02d} - Loss: {total_loss:.4f}")

# 10. Değerlendirme
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns

model.eval()
y_true, y_pred, y_prob = [], [], []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        outputs = model(xb)
        probs = torch.softmax(outputs, dim=1)[:,1].cpu().numpy()
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        y_true.extend(yb.numpy())
        y_pred.extend(preds)
        y_prob.extend(probs)

print(classification_report(y_true, y_pred))

# Confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_true, y_pred), annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# ROC curve
fpr, tpr, _ = roc_curve(y_true, y_prob)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_true, y_prob):.2f}")
plt.plot([0, 1], [0, 1], '--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()

--- Final Test Raporu ---
               precision    recall  f1-score   support

Dikkatsiz (0)       0.92      0.73      0.81       144
 Dikkatli (1)       0.77      0.94      0.85       143

     accuracy                           0.83       287
    macro avg       0.85      0.83      0.83       287
 weighted avg       0.85      0.83      0.83       287


Confusion Matrix (Text):
[[105  39]
 [  9 134]]