In [1]:
import pandas as pd

## Get the full data generated by a model

In [4]:
full_data = pd.read_csv("/home/max/Studium/Leipzig/Semester6/Math_and_ML/math-ml/results/accuracy_exp/all_baseline_deepseek/evaluation_results.csv")

In [5]:
full_data.head()

Unnamed: 0,prompt_id,entropy,latency,tokens_used,correct,llm_answer,ground_truth,avg_au,avg_eu,cosine
0,prompt0,0.900461,0.841162,7,no,8.0,18,2.181641,0.038239,0.462646
1,prompt1,0.921795,0.805534,7,no,2.0,3,2.179688,0.038635,0.462646
2,prompt2,0.43903,0.929708,11,no,35000.0,70000,1.088477,0.036298,0.600122
3,prompt3,0.224657,0.834156,10,no,180.0,540,1.060872,0.038472,0.58138
4,prompt4,0.78317,0.74756,8,no,35.0,20,2.121094,0.037048,0.420288


## Only take entropy, cosine and correct fields

In [14]:
data = full_data[["entropy", "cosine", "correct"]]

In [15]:
data.head()

Unnamed: 0,entropy,cosine,correct
0,0.900461,0.462646,no
1,0.921795,0.462646,no
2,0.43903,0.600122,no
3,0.224657,0.58138,no
4,0.78317,0.420288,no


## Remove buggy prompts

In [19]:
buggy_ids = data[data['correct'] == 'buggy']
print(len(buggy_ids))
buggy_ids.head()

15


Unnamed: 0,entropy,cosine,correct
35,,,buggy
38,,,buggy
46,,,buggy
183,,,buggy
230,,,buggy


In [20]:
print(f"Length before removing buggy samples:{len(data)}")
data = data[data['correct'] != 'buggy']
print(f"Length after removing buggy samples:{len(data)}")

Length before removing buggy samples:1319
Length after removing buggy samples:1304


## Build a classifier on the data predicting correctness using entropy + cosine of each sample

In [21]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [22]:
# Convert 'correct' to 0 (no) and 1 (yes)
data['correct'] = data['correct'].map({'no': 0, 'yes': 1})
data.head()

Unnamed: 0,entropy,cosine,correct
0,0.900461,0.462646,0
1,0.921795,0.462646,0
2,0.43903,0.600122,0
3,0.224657,0.58138,0
4,0.78317,0.420288,0


In [26]:
# Features and labels
X = data[['entropy', 'cosine']].values
y = data['correct'].values
print(X[:5])
print(y[:5])

[[0.90046125 0.46264648]
 [0.92179498 0.46264648]
 [0.43903042 0.60012207]
 [0.22465748 0.58138021]
 [0.78317043 0.42028809]]
[0 0 0 0 0]


In [28]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"{len(X_train)=}")
print(f"{len(X_test)=}")
print(f"{len(y_train)=}")
print(f"{len(y_test)=}")

len(X_train)=1043
len(X_test)=261
len(y_train)=1043
len(y_test)=261


In [31]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # for classification
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
print(f"{X_train_tensor.shape=}")
print(f"{y_train_tensor.shape=}")
print(f"{X_test_tensor.shape=}")
print(f"{y_test_tensor.shape=}")

X_train_tensor.shape=torch.Size([1043, 2])
y_train_tensor.shape=torch.Size([1043])
X_test_tensor.shape=torch.Size([261, 2])
y_test_tensor.shape=torch.Size([261])


In [None]:
import torch.nn as nn
import torch.nn.functional as F

In [33]:
#define NN
class SimpleClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 2)  # Output layer: 2 logits for binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return self.fc4(x)  # raw logits

In [36]:
from torch.utils.data import TensorDataset, DataLoader

# Hyperparameters
batch_size = 16
lr = 0.001
epochs = 500

# Dataset and loader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model, loss, optimizer
model = SimpleClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Training
for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb)
        loss = criterion(preds, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}")

Epoch 10/500 - Loss: 0.0336
Epoch 20/500 - Loss: 0.0384
Epoch 30/500 - Loss: 0.0375
Epoch 40/500 - Loss: 0.0281
Epoch 50/500 - Loss: 0.0406
Epoch 60/500 - Loss: 0.0342
Epoch 70/500 - Loss: 0.0648
Epoch 80/500 - Loss: 0.0343
Epoch 90/500 - Loss: 0.0343
Epoch 100/500 - Loss: 0.0626
Epoch 110/500 - Loss: 1.0671
Epoch 120/500 - Loss: 0.0302
Epoch 130/500 - Loss: 0.0206
Epoch 140/500 - Loss: 0.0370
Epoch 150/500 - Loss: 0.0352
Epoch 160/500 - Loss: 0.0270
Epoch 170/500 - Loss: 0.0428
Epoch 180/500 - Loss: 0.0235
Epoch 190/500 - Loss: 0.0360
Epoch 200/500 - Loss: 1.1463
Epoch 210/500 - Loss: 0.0250
Epoch 220/500 - Loss: 0.0332
Epoch 230/500 - Loss: 0.0928
Epoch 240/500 - Loss: 0.0418
Epoch 250/500 - Loss: 0.0298
Epoch 260/500 - Loss: 1.1746
Epoch 270/500 - Loss: 0.0362
Epoch 280/500 - Loss: 0.0190
Epoch 290/500 - Loss: 0.0248
Epoch 300/500 - Loss: 0.0228
Epoch 310/500 - Loss: 0.0364
Epoch 320/500 - Loss: 0.0289
Epoch 330/500 - Loss: 0.0252
Epoch 340/500 - Loss: 0.0367
Epoch 350/500 - Loss: 0

In [38]:
#Eval
model.eval()
with torch.no_grad():
    test_preds = model(X_test_tensor)
    predicted = torch.argmax(test_preds, dim=1)
    accuracy = (predicted == y_test_tensor).float().mean().item()

print(f"Test Accuracy: {accuracy:.2%}")

Test Accuracy: 96.17%
