In [1]:
import pandas as pd
from docx import Document

scores_path = r"C:\Users\KVIRDI\Downloads\scores.csv"
scores = pd.read_csv(scores_path)

print("Scores shape:", scores.shape)
print("First few columns:", scores.columns.tolist()[:10])

Scores shape: (6696, 76)
First few columns: ['question_ind', 'section', 'tfidf', 'embedding', 'attention_0_0', 'attention_0_1', 'attention_0_2', 'attention_0_3', 'attention_0_4', 'attention_0_5']


In [2]:
questions_path = r"C:\Users\KVIRDI\Downloads\RSM_Questions.docx"
doc = Document(questions_path)

questions = pd.DataFrame(columns=['question', 'section'])

In [None]:
text = [p.text for p in doc.paragraphs if p.text]

for i in range(0, len(text), 2):
    question = text[i]
    section_line = text[i + 1]

    section_comps = section_line.strip(' ').split(' ')

    # Same multi-label logic you had before
    if len(section_comps) > 2:
        if section_comps[0] == 'Appendix':
            label = 'Appendix 6,Appendix 7'
        else:
            label = '1.3.4.1,2.2.1'
    else:
        label = section_comps[-1]

    questions.loc[len(questions), :] = [question, label]

print("Number of questions:", len(questions))
print(questions.head())


Number of questions: 54
                                            question  section
0  What is the non-fixed radiation contamination ...  2.4.6.2
1  What procedures should be followed if the non-...  2.4.6.2
2  When should the wipe tests be done after using...  2.4.6.1
3  When is leak testing of sealed sources or devi...    2.4.8
4  What is the schedule for leak testing the seal...    2.4.8


In [4]:
text = [p.text for p in doc.paragraphs if p.text]

for i in range(0, len(text), 2):
    question = text[i]
    section_line = text[i + 1]

    section_comps = section_line.strip(' ').split(' ')

    if len(section_comps) > 2:
        if section_comps[0] == 'Appendix':
            label = 'Appendix 6,Appendix 7'
        else:
            label = '1.3.4.1,2.2.1'
    else:
        label = section_comps[-1]

    questions.loc[len(questions), :] = [question, label]

print("Number of questions:", len(questions))
print(questions.head())

questions["gold_sections"] = questions["section"].apply(
    lambda s: [x.strip() for x in str(s).split(",")]
)

print(questions[["question", "section", "gold_sections"]].head())


Number of questions: 108
                                            question  section
0  What is the non-fixed radiation contamination ...  2.4.6.2
1  What procedures should be followed if the non-...  2.4.6.2
2  When should the wipe tests be done after using...  2.4.6.1
3  When is leak testing of sealed sources or devi...    2.4.8
4  What is the schedule for leak testing the seal...    2.4.8
                                            question  section gold_sections
0  What is the non-fixed radiation contamination ...  2.4.6.2     [2.4.6.2]
1  What procedures should be followed if the non-...  2.4.6.2     [2.4.6.2]
2  When should the wipe tests be done after using...  2.4.6.1     [2.4.6.1]
3  When is leak testing of sealed sources or devi...    2.4.8       [2.4.8]
4  What is the schedule for leak testing the seal...    2.4.8       [2.4.8]


Model Accuracies

In [None]:
def top1_accuracy(scores_df, questions_df, method_col):
  
    correct = 0
    total = 0

    valid_q_indices = sorted(scores_df["question_ind"].unique())

    for q_idx in valid_q_indices:
        if q_idx >= len(questions_df):
            print(f"Skipping q_idx {q_idx}: not present in questions_df")
            continue

        q_scores = scores_df[scores_df["question_ind"] == q_idx]

        if q_scores.empty:
            print(f"Warning: no scores for question_ind {q_idx}, skipping.")
            continue

        best_row = q_scores.sort_values(method_col, ascending=False).iloc[0]
        pred_section = str(best_row["section"]).strip()

        gold_list = questions_df.loc[q_idx, "gold_sections"]

        if pred_section in gold_list:
            correct += 1

        total += 1

    if total == 0:
        return float("nan")

    return correct / total



In [6]:
method_cols = ["tfidf", "embedding"]

attention_cols = [c for c in scores.columns if c.startswith("attention_")]
method_cols = method_cols + attention_cols

results = []
for m in method_cols:
    acc = top1_accuracy(scores, questions, m)
    results.append({"method": m, "top1_accuracy": acc})

accuracy_df = pd.DataFrame(results).sort_values("top1_accuracy", ascending=False)

out_path = r"C:\Users\KVIRDI\Downloads\model_accuracies.csv"
accuracy_df.to_csv(out_path, index=False)
print(f"\nSaved accuracies to: {out_path}")


Saved accuracies to: C:\Users\KVIRDI\Downloads\model_accuracies.csv


Model Predictions: Using the best attention model on average

In [None]:
def get_top_predictions(scores_df, questions_df, method_cols):
  
    records = []
    valid_q_indices = sorted(scores_df["question_ind"].unique())

    for q_idx in valid_q_indices:
        if q_idx >= len(questions_df):
            continue

        gold_list = questions_df.loc[q_idx, "gold_sections"]
        question_text = questions_df.loc[q_idx, "question"]

        q_scores = scores_df[scores_df["question_ind"] == q_idx]

        for m in method_cols:
            if m not in scores_df.columns:
                continue

            qs = q_scores.sort_values(m, ascending=False)

            if qs.empty:
                continue

            best_row = qs.iloc[0]
            pred_section = str(best_row["section"]).strip()
            is_correct = pred_section in gold_list

            records.append({
                "question_ind": q_idx,
                "question": question_text,
                "method": m,
                "pred_section": pred_section,
                "is_correct": is_correct,
                "gold_sections": gold_list
            })

    return pd.DataFrame(records)

core_methods = ["tfidf", "embedding", "attention_2_3"]

predictions_df = get_top_predictions(scores, questions, core_methods)
predictions_df.head(10)

output_path = r"C:\Users\KVIRDI\Downloads\model_predictions.csv"
predictions_df.to_csv(output_path, index=False)
print(f"Saved predictions to: {output_path}")


Saved predictions to: C:\Users\KVIRDI\Downloads\model_predictions.csv


Model Predictions: Using the best attention model for a given question

In [None]:

def get_top_predictions_with_best_attention(scores_df, questions_df, baseline_methods):
    
    records = []
    valid_q_indices = sorted(scores_df["question_ind"].unique())

    attention_cols = [c for c in scores_df.columns if c.startswith("attention_")]

    for q_idx in valid_q_indices:
        if q_idx >= len(questions_df):
            continue

        gold_list = questions_df.loc[q_idx, "gold_sections"]
        question_text = questions_df.loc[q_idx, "question"]

        q_scores = scores_df[scores_df["question_ind"] == q_idx]

        for m in baseline_methods:
            if m not in scores_df.columns:
                continue

            qs = q_scores.sort_values(m, ascending=False)
            if qs.empty:
                continue

            best_row = qs.iloc[0]
            pred_section = str(best_row["section"]).strip()
            is_correct = pred_section in gold_list

            records.append({
                "question_ind": q_idx,
                "question": question_text,
                "method": m,
                "pred_section": pred_section,
                "is_correct": is_correct,
                "gold_sections": gold_list
            })

        if attention_cols:
            best_head_name = None
            best_head_score = None
            best_head_section = None

            for att in attention_cols:
                qs_att = q_scores.sort_values(att, ascending=False)
                if qs_att.empty:
                    continue

                top_row = qs_att.iloc[0]
                top_score = top_row[att]
                top_section = str(top_row["section"]).strip()

                if (best_head_score is None) or (top_score > best_head_score):
                    best_head_score = top_score
                    best_head_section = top_section
                    best_head_name = att

            if best_head_name is not None:
                is_correct = best_head_section in gold_list

                records.append({
                    "question_ind": q_idx,
                    "question": question_text,
                    "method": best_head_name, 
                    "pred_section": best_head_section,
                    "is_correct": is_correct,
                    "gold_sections": gold_list
                })

    return pd.DataFrame(records)

baseline_methods = ["tfidf", "embedding"]

predictions_df = get_top_predictions_with_best_attention(scores, questions, baseline_methods)

output_path = r"C:\Users\KVIRDI\Downloads\model_predictions_best_attention.csv"
predictions_df.to_csv(output_path, index=False)

print(f"Saved predictions to: {output_path}")
predictions_df.head(12)


Saved predictions to: C:\Users\KVIRDI\Downloads\model_predictions_best_attention.csv


Unnamed: 0,question_ind,question,method,pred_section,is_correct,gold_sections
0,0,What is the non-fixed radiation contamination ...,tfidf,2.4.6.2,True,[2.4.6.2]
1,0,What is the non-fixed radiation contamination ...,embedding,2.4.6.4,False,[2.4.6.2]
2,0,What is the non-fixed radiation contamination ...,attention_2_1,Appendix 15,False,[2.4.6.2]
3,1,What procedures should be followed if the non-...,tfidf,2.4.14.2,False,[2.4.6.2]
4,1,What procedures should be followed if the non-...,embedding,2.4.6.4,False,[2.4.6.2]
5,1,What procedures should be followed if the non-...,attention_2_3,Appendix 15,False,[2.4.6.2]
6,2,When should the wipe tests be done after using...,tfidf,2.4.23,False,[2.4.6.1]
7,2,When should the wipe tests be done after using...,embedding,2.4.6.4,False,[2.4.6.1]
8,2,When should the wipe tests be done after using...,attention_1_3,Appendix 15,False,[2.4.6.1]
9,3,When is leak testing of sealed sources or devi...,tfidf,2.3.8,False,[2.4.8]
