<a href="https://colab.research.google.com/github/freida20git/SubjectiveQA-Rater/blob/main/Inference_QArater.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
import matplotlib.pyplot as plt
import numpy as np
from transformers import RobertaModel

In [None]:
!pip install transformers

In [None]:
!pip install safetensors

In [3]:
target_columns= ['question_conversational',
 'question_opinion_seeking',
 'Q_INTEREST',
 'QA_INSTRUCTIONS',
 'QA_PROCEDURE',
 'QA_REASON_EXP',
 'ANSWERS_QUESTION',
 'QUESTION_QUALITY',
 'ANSWER_QUALITY']

In [4]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# Simple RoBERTa none shared weights:

class SimpleRoBERTa(nn.Module):
    def __init__(self, dropout=0.2, out_dim=7):
        super().__init__()
        self.roberta_q = RobertaModel.from_pretrained("roberta-base")
        self.roberta_a = RobertaModel.from_pretrained("roberta-base")
        self.dropout = nn.Dropout(dropout)
        self.regressor = nn.Linear(1536, out_dim)  # 768*2 if concat
        self._init_weights()

    def _init_weights(self):
        nn.init.xavier_uniform_(self.regressor.weight)
        self.regressor.bias.data.zero_()

    def forward(self, input_ids_q, attention_mask_q, input_ids_a, attention_mask_a):
        q_out = self.roberta_q(input_ids=input_ids_q, attention_mask=attention_mask_q).last_hidden_state
        a_out = self.roberta_a(input_ids=input_ids_a, attention_mask=attention_mask_a).last_hidden_state

        # Mean pooling to average only the real tokens
        q_vec = (q_out * attention_mask_q.unsqueeze(-1)).sum(1) / attention_mask_q.sum(1, keepdim=True)
        a_vec = (a_out * attention_mask_a.unsqueeze(-1)).sum(1) / attention_mask_a.sum(1, keepdim=True)

        # Combine (concatenate)
        combined = torch.cat([q_vec, a_vec], dim=1)
        output = self.regressor(self.dropout(combined))
        return output


In [7]:
# Siamese (shared weights) RoBERTa + CrossAttention :

# Optional Cross-Attention Layer
class CrossAttention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=hidden_size, num_heads=8, batch_first=True)

    def forward(self, query, key, key_mask):
        # Pass value as key (standard practice for attention)
        attn_output, _ = self.attn(query, key, key, key_padding_mask=~key_mask.bool())
        return attn_output

# Siamese RoBERTa with Shared Encoder, Cross-Attention (optional), and CLS pooling
class SiameseRobertaSharedCLS(nn.Module):
    def __init__(self, model_name='roberta-base', num_targets=9, use_cross_attention=True, use_single_cross=True):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.use_cross_attention = use_cross_attention
        self.use_single_cross = use_single_cross

        hidden_size = self.encoder.config.hidden_size

        if use_cross_attention:
            self.cross_attn_q2a = CrossAttention(hidden_size)
            if not use_single_cross:
                self.cross_attn_a2q = CrossAttention(hidden_size)
            fusion_dim = hidden_size * (3 if use_single_cross else 4)
        else:
            fusion_dim = hidden_size * 2  # q + a only

        self.regressor = nn.Sequential(
            nn.Linear(fusion_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_targets)
           # nn.Sigmoid() #for outputs to be between 0-1!!
        )

    def cls_pooling(self, model_output):
        return model_output[:, 0]  # First token embedding ([CLS] or )

    def forward(self, input_ids_q, attention_mask_q, input_ids_a, attention_mask_a):
        q_out = self.encoder(input_ids=input_ids_q, attention_mask=attention_mask_q).last_hidden_state
        a_out = self.encoder(input_ids=input_ids_a, attention_mask=attention_mask_a).last_hidden_state

        q_vec = self.cls_pooling(q_out)
        a_vec = self.cls_pooling(a_out)

        if self.use_cross_attention:
            q2a = self.cross_attn_q2a(q_out, a_out, attention_mask_a)
            q2a_vec = self.cls_pooling(q2a)

            if self.use_single_cross:
                x = torch.cat([q_vec, a_vec, q2a_vec], dim=1)
            else:
                a2q = self.cross_attn_a2q(a_out, q_out, attention_mask_q)
                a2q_vec = self.cls_pooling(a2q)
                x = torch.cat([q_vec, a_vec, q2a_vec, a2q_vec], dim=1)
        else:
            x = torch.cat([q_vec, a_vec], dim=1)

        return self.regressor(x)


Update to your file path:

In [None]:
model_path="/content/RoBERTa_5epochs.pth"

for siamese net:

In [None]:
model = SiameseRobertaSharedCLS(num_targets=9, use_cross_attention=True, use_single_cross=True)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

for simple RoBERTa net:

In [None]:
model = SimpleRoBERTa(out_dim=len(target_columns))
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

In [None]:
def predict(model, question_title, question_body, answer):
    model.eval()
    with torch.no_grad():
        q_text = "Title: " + question_title + " [SEP] " + question_body
        a_text = answer

        q_tokens = tokenizer(q_text, return_tensors="pt", truncation=True, padding="max_length", max_length=256)
        a_tokens = tokenizer(a_text, return_tensors="pt", truncation=True, padding="max_length", max_length=256)

        q_ids = q_tokens["input_ids"].to(device)
        q_mask = q_tokens["attention_mask"].to(device)
        a_ids = a_tokens["input_ids"].to(device)
        a_mask = a_tokens["attention_mask"].to(device)

        pred = model(q_ids, q_mask, a_ids, a_mask)
        return pred.squeeze(0).cpu().numpy()

In [None]:

def plot_predictions(preds, title):
    plt.figure(figsize=(14, 6))  # Taller plot (was 14x4)
    bars = plt.bar(target_columns, preds, color='skyblue')
    plt.xticks(rotation=90)
    plt.ylim(0, 1.1)  # Allow space above bars for text
    plt.ylabel("Predicted Score")
    plt.title(title)
    plt.grid(True, axis='y')

    # Add value labels on top of bars
    for bar, score in zip(bars, preds):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                 f'{score:.2f}', ha='center', va='bottom', fontsize=8)

    plt.tight_layout()
    plt.show()

you can change the questions to compare as wanted:

In [None]:
qa_first = {
    "question_title": "What is the difference between supervised and unsupervised learning?",
    "question_body": "I've been studying machine learning and I keep seeing the terms 'supervised' and 'unsupervised' learning. Can someone explain the difference in simple terms, with an example if possible?",
    "answer": "Sure! In supervised learning, the model is trained on labeled data—meaning the correct answers are provided. For example, predicting house prices based on known sales data. In unsupervised learning, the data is unlabeled and the model tries to find structure in the input, like clustering customers into groups based on purchasing behavior."
}
qa_contrast = {
    "question_title": "What’s new in the latest iOS update?",
    "question_body": "I heard there was an update. Is it good? What’s the deal with it? ",
    "answer": "Android is actually better in my opinion. I don’t like iPhones at all."
}
pred_first = predict(model, **qa_first)
pred_contrast = predict(model, **qa_contrast)

plot_predictions(pred_first, "A good, factual, clear QA pair")
plot_predictions(pred_contrast, "A vague, conversational, opinion-seeking QA pair")
