# Usability Scoring Model (UICrit Dataset)
Train a regression model to predict usability score using tabular features.

In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [3]:
# Load aggregated CSV with source features
df = pd.read_csv('../eval/uicrit/aggregated_uicrit.csv')

# Define Dataset
class UsabilityDataset(Dataset):
    def __init__(self, df):
        self.X = df[['comment_count', 'human', 'llm', 'both', 'weighted_comment_score']].values.astype('float32')
        self.y = df['usability_score_norm'].values.astype('float32')

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])


In [4]:
# Split data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_dataset = UsabilityDataset(train_df)
test_dataset = UsabilityDataset(test_df)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [5]:
# Define Model
class UsabilityRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze()


In [6]:
# Train
model = UsabilityRegressor(input_dim=5)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, loader):
    model.train()
    for epoch in range(10):
        total_loss = 0
        for X_batch, y_batch in loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(loader):.4f}")

train(model, train_loader)

Epoch 1, Loss: 0.1215
Epoch 2, Loss: 0.0626
Epoch 3, Loss: 0.0520
Epoch 4, Loss: 0.0440
Epoch 5, Loss: 0.0376
Epoch 6, Loss: 0.0322
Epoch 7, Loss: 0.0275
Epoch 8, Loss: 0.0230
Epoch 9, Loss: 0.0200
Epoch 10, Loss: 0.0176


In [7]:
# Evaluate
model.eval()
predictions, targets = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        preds = model(X_batch)
        predictions.extend(preds.tolist())
        targets.extend(y_batch.tolist())

# Save results
results = pd.DataFrame({
    'Predicted': predictions,
    'True': targets
})
results.to_csv('predictions.csv', index=False)
results

Unnamed: 0,Predicted,True
0,0.786097,0.904762
1,0.977309,0.666667
2,0.783258,0.857143
3,0.741901,0.952381
4,0.872619,0.619048
...,...,...
195,0.830797,0.761905
196,0.700506,1.095238
197,0.784754,0.714286
198,0.803704,0.904762
