In [53]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
import re

df = pd.read_csv("../data/df_eda.csv")

# --- Basic Linguistic Features (already present in df) ---
# ['response_length', 'num_sentences', 'num_tokens', 'num_questions',
#  'contains_question', 'contains_question_words', 'sentence_similarity']

# --- Normalize length features within each conversation_id (Z-score) ---
length_features = ['response_length', 'sentence_similarity', 'num_questions']
for feature in length_features:
    df[f'{feature}_z'] = df.groupby('conversation_id')[feature].transform(lambda x: (x - x.mean()) / (x.std() + 1e-6))

# --- Add interaction feature ---
df['question_density'] = df['num_questions'] / (df['num_sentences'] + 1e-6)
df['question_similarity_interaction'] = df['contains_question'].astype(int) * df['sentence_similarity']

# --- Boilerplate / top n-grams as binary flags ---
boilerplate_phrases = [
    "great job", "let try", "total number", "looks like", "right track", "closer look",
    "let closer", "small mistake", "let closer look", "let think", "good try", "let look",
    "double check", "tutor response", "job let", "great job let", "total cost", "think small",
    "effort let", "let double", "let double check", "response maximum", "tutor response maximum",
    "great start", "make sure", "maximum sentence", "response maximum sentence", "great try",
    "assistant tutor", "assistant tutor response"
]

# Preprocessing
def contains_phrase(text, phrase):
    return int(bool(re.search(rf'\b{re.escape(phrase)}\b', text.lower())))

for phrase in boilerplate_phrases:
    flag_name = f"has_{phrase.replace(' ', '_')}"
    df[flag_name] = df['response'].apply(lambda x: contains_phrase(x, phrase))

# --- Aggregated cross-tutor features ---
agg_features = ['sentence_similarity', 'response_length', 'num_questions']
aggregations = df.groupby('conversation_id')[agg_features].agg(['mean', 'std', 'min', 'max'])
aggregations.columns = ['_'.join(col) for col in aggregations.columns]
df = df.merge(aggregations, left_on='conversation_id', right_index=True, how='left')

# --- One-hot encoding tutor for tutor-aware training ---
df = pd.get_dummies(df, columns=['tutor'], prefix='tutor')

# Save final feature set
df.to_csv("../data/final_features.csv", index=False)


In [54]:
tutor_columns = [col for col in df.columns if col.startswith('tutor_')]
df['tutor_label'] = df[tutor_columns].idxmax(axis=1).str.replace('tutor_', '')

In [65]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# TF-IDF vectorization
tfidf = TfidfVectorizer(ngram_range=(1, 2), stop_words='english', max_features=300)
tfidf_matrix = tfidf.fit_transform(df['response'])

# Dimensionality reduction (optional but recommended)
svd = TruncatedSVD(n_components=50, random_state=42)
tfidf_reduced = svd.fit_transform(tfidf_matrix)

# Add back to df
tfidf_df = pd.DataFrame(tfidf_reduced, columns=[f"tfidf_{i}" for i in range(tfidf_reduced.shape[1])])
df = pd.concat([df.reset_index(drop=True), tfidf_df.reset_index(drop=True)], axis=1)


In [49]:
import torch.nn as nn

class TutorClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, dropout=0.3, num_tutors=9):
        super().__init__()
        
        self.shared = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        self.head_mistake_identification = nn.Linear(hidden_dim // 2, 3)
        self.head_mistake_location = nn.Linear(hidden_dim // 2, 3)
        self.head_providing_guidance = nn.Linear(hidden_dim // 2, 3)
        self.head_actionability = nn.Linear(hidden_dim // 2, 3)

        # Optional tutor prediction head
        self.head_tutor = nn.Linear(hidden_dim // 2, num_tutors)

    def forward(self, x):
        shared = self.shared(x)
        return {
            'mistake_identification': self.head_mistake_identification(shared),
            'mistake_location': self.head_mistake_location(shared),
            'providing_guidance': self.head_providing_guidance(shared),
            'actionability': self.head_actionability(shared),
            'tutor': self.head_tutor(shared)  # optional
        }


In [50]:
from torch.utils.data import Dataset
import torch

class TutorDataset(Dataset):
    def __init__(self, dataframe, feature_cols, label_cols, tutor_col=None):
        self.features = dataframe[feature_cols].values.astype('float32')
        self.labels = dataframe[label_cols].values.astype('int64')
        self.tutors = None
        if tutor_col:
            self.tutors = dataframe[tutor_col].astype('category').cat.codes.values.astype('int64')

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        label_names = ['mistake_identification', 'mistake_location', 'providing_guidance', 'actionability']
        
        label_dict = {
            name: torch.tensor(self.labels[idx][i], dtype=torch.long)
            for i, name in enumerate(label_names)
        }

        sample = {
            'features': torch.tensor(self.features[idx]),
            'labels': label_dict
        }

        if self.tutors is not None:
            sample['tutor'] = torch.tensor(self.tutors[idx], dtype=torch.long)

        return sample


In [None]:
import numpy as np
import pandas as pd

# Load embeddings
bert_embeddings = np.load("../data/embeddings/X_bert_mean.npy")  # shape: (num_rows, embedding_dim)

bert_dim = bert_embeddings.shape[1]
bert_df = pd.DataFrame(bert_embeddings, columns=[f'bert_{i}' for i in range(bert_dim)])


Loaded BERT embeddings: (2476, 768)
2476


In [86]:
from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import train_test_split


feature_cols = [
    col for col in df.columns
    if (col.startswith("tfidf_") or col not in [
        'conversation_id', 'response_id',
        'mistake_identification', 'mistake_location',
        'providing_guidance', 'actionability'
    ])
]
feature_cols += [col for col in bert_df.columns]
label_cols = ['mistake_identification', 'mistake_location', 'providing_guidance', 'actionability']
tutor_col = 'tutor_label'
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Final feature columns: numeric and not labels
feature_cols = [col for col in numeric_cols if col not in label_cols]
# Initialize dataset
dataset = TutorDataset(df, feature_cols, label_cols, tutor_col=tutor_col)

# Train/Val Split
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report
import numpy as np

In [75]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = {}
for dim in label_cols:
    weights = compute_class_weight('balanced', classes=np.unique(df[dim]), y=df[dim])
    class_weights[dim] = torch.tensor(weights, dtype=torch.float)


In [76]:
loss_fns = {dim: nn.CrossEntropyLoss(weight=class_weights[dim]) for dim in class_weights}

In [77]:
def evaluate_model(model, val_loader, use_tutor_head=False):
    model.eval()
    dimension_keys = ['mistake_identification', 'mistake_location', 'providing_guidance', 'actionability']
    y_true = {dim: [] for dim in dimension_keys}
    y_pred = {dim: [] for dim in dimension_keys}

    tutor_true = []
    tutor_pred = []

    with torch.no_grad():
        for batch in val_loader:
            x = batch['features']
            y = batch['labels']
            t = batch['tutor'] if use_tutor_head else None

            outputs = model(x)
            task_outputs = {k: outputs[k] for k in ['mistake_identification', 'mistake_location', 'providing_guidance', 'actionability']}
            tutor_output = outputs['tutor'] if use_tutor_head else None
            

            for dim in dimension_keys:
                preds = torch.argmax(task_outputs[dim], dim=1).cpu().numpy()
                y_pred[dim].extend(preds)
                y_true[dim].extend(y[dim].cpu().numpy())

            if use_tutor_head and tutor_output is not None:
                tutor_pred.extend(torch.argmax(tutor_output, dim=1).cpu().numpy())
                tutor_true.extend(t.cpu().numpy())


    for dim in dimension_keys:
        print(f"\n Classification Report for {dim}")
        print(classification_report(y_true[dim], y_pred[dim], digits=3, zero_division=0))

    if use_tutor_head:
        print("\n Tutor Classification Report")
        print(classification_report(tutor_true, tutor_pred, digits=3, zero_division=0))


In [67]:


def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-3, use_tutor_head=False):
    model = model

    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0

        for batch in train_loader:
            x = batch['features']
            y = batch['labels']
            t = batch['tutor'] if use_tutor_head else None

            outputs = model(x)
            task_outputs = {k: outputs[k] for k in ['mistake_identification', 'mistake_location', 'providing_guidance', 'actionability']}
            tutor_output = outputs['tutor'] if use_tutor_head else None

            loss = sum(loss_fns[dim](task_outputs[dim], y[dim]) for dim in task_outputs)

            if use_tutor_head:
                loss += loss_fn(tutor_output, t)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss/len(train_loader):.4f}")

        evaluate_model(model, val_loader, use_tutor_head)


In [87]:
model = TutorClassifier(len(feature_cols))
train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-3, use_tutor_head=False)

Epoch 1/10 - Train Loss: 9.3328

 Classification Report for mistake_identification
              precision    recall  f1-score   support

           0      0.000     0.000     0.000        67
           1      0.065     0.970     0.122        33
           2      0.000     0.000     0.000       396

    accuracy                          0.065       496
   macro avg      0.022     0.323     0.041       496
weighted avg      0.004     0.065     0.008       496


 Classification Report for mistake_location
              precision    recall  f1-score   support

           0      1.000     0.014     0.028       142
           1      0.071     1.000     0.132        35
           2      0.000     0.000     0.000       319

    accuracy                          0.075       496
   macro avg      0.357     0.338     0.053       496
weighted avg      0.291     0.075     0.017       496


 Classification Report for providing_guidance
              precision    recall  f1-score   support

        

it s worse when i add bert embeddings :(
