In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import classification_report, f1_score, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from datasets import load_dataset
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

In [3]:
dataset_dict = load_dataset("ailsntua/QEvasion")

train_ds = dataset_dict["train"]
test_ds  = dataset_dict["test"]

print(train_ds)
print(train_ds.column_names)

Dataset({
    features: ['title', 'date', 'president', 'url', 'question_order', 'interview_question', 'interview_answer', 'gpt3.5_summary', 'gpt3.5_prediction', 'question', 'annotator_id', 'annotator1', 'annotator2', 'annotator3', 'inaudible', 'multiple_questions', 'affirmative_questions', 'index', 'clarity_label', 'evasion_label'],
    num_rows: 3448
})
['title', 'date', 'president', 'url', 'question_order', 'interview_question', 'interview_answer', 'gpt3.5_summary', 'gpt3.5_prediction', 'question', 'annotator_id', 'annotator1', 'annotator2', 'annotator3', 'inaudible', 'multiple_questions', 'affirmative_questions', 'index', 'clarity_label', 'evasion_label']


In [4]:
train_ds = train_ds.remove_columns(["title", "date", "url", "annotator_id", "annotator1", "annotator2", "annotator3"])


In [5]:
train_df = train_ds.to_pandas()
train_df.head()

Unnamed: 0,president,question_order,interview_question,interview_answer,gpt3.5_summary,gpt3.5_prediction,question,inaudible,multiple_questions,affirmative_questions,index,clarity_label,evasion_label
0,Joseph R. Biden,1,Q. Of the Biden administration. And accused th...,"Well, look, first of all, theI am sincere abou...",The question consists of 2 parts: \n1. How wou...,Question part: 1. How would you respond to the...,How would you respond to the accusation that t...,False,False,False,0,Clear Reply,Explicit
1,Joseph R. Biden,1,Q. Of the Biden administration. And accused th...,"Well, look, first of all, theI am sincere abou...",The question consists of 2 parts: \n1. How wou...,Question part: 1. How would you respond to the...,Do you think President Xi is being sincere abo...,False,False,False,1,Ambivalent,General
2,Joseph R. Biden,2,Q. No worries. Do you believe the country's sl...,"Look, I think China has a difficult economic p...",The question consists of two parts:\n\n1. Q1: ...,Question part: Q1 - Do you believe the country...,Do you believe the country's slowdown and gro...,False,False,False,2,Ambivalent,Partial/half-answer
3,Joseph R. Biden,2,Q. No worries. Do you believe the country's sl...,"Look, I think China has a difficult economic p...",The question consists of two parts:\n\n1. Q1: ...,Question part: Q1 - Do you believe the country...,Are you worried about the meeting between Pre...,False,False,False,3,Ambivalent,Dodging
4,Joseph R. Biden,3,"Q. I can imagine. It is evening, I'd like to r...","Well, I hope I get to see Mr. Xi sooner than l...",The question consists of 3 parts:\n1. Is the P...,Question part: 1. Is the President's engagemen...,Is the President's engagement with Asian coun...,False,False,False,4,Clear Reply,Explicit


In [6]:
test_ds = test_ds.remove_columns(
    ["title", "date", "url", "annotator_id", "annotator1", "annotator2", "annotator3"]
)

test_df = test_ds.to_pandas()
test_df.head()

Unnamed: 0,president,question_order,interview_question,interview_answer,gpt3.5_summary,gpt3.5_prediction,question,inaudible,multiple_questions,affirmative_questions,index,clarity_label,evasion_label
0,,5,"Q. What about the redline, sir?","Well, the world has made it clear that these t...",,,Inquiring about the status or information reg...,False,False,True,0,Ambivalent,
1,,2,Q. Will you invite them to the White House to ...,I think that anytime and anyplace that they ar...,,,Will you invite them to the White House to neg...,False,False,False,1,Ambivalent,
2,,1,"Q. Harsh. Mr. President, Japan has dropped the...",I think that the purpose of the U.N. Security ...,,,Why was it necessary for Japan to drop the thr...,False,False,False,2,Ambivalent,
3,,2,Q. The Lebanese Prime Minister is demanding a ...,I'll let Condi talk about the details of what ...,,,When will we see this resolution?,False,False,False,3,Ambivalent,
4,,7,"Q. Thank you, Mr. President. Back on Iraq, a g...","No, I don't consider it a credible report; nei...",,,Updating the figure of Iraqi deaths,False,False,True,4,Ambivalent,


In [7]:
if "index" in train_df.columns:
    train_df = train_df.drop(columns=["index"])

In [8]:
train_df = train_ds.to_pandas()
test_df  = test_ds.to_pandas()

train_df.head()


Unnamed: 0,president,question_order,interview_question,interview_answer,gpt3.5_summary,gpt3.5_prediction,question,inaudible,multiple_questions,affirmative_questions,index,clarity_label,evasion_label
0,Joseph R. Biden,1,Q. Of the Biden administration. And accused th...,"Well, look, first of all, theI am sincere abou...",The question consists of 2 parts: \n1. How wou...,Question part: 1. How would you respond to the...,How would you respond to the accusation that t...,False,False,False,0,Clear Reply,Explicit
1,Joseph R. Biden,1,Q. Of the Biden administration. And accused th...,"Well, look, first of all, theI am sincere abou...",The question consists of 2 parts: \n1. How wou...,Question part: 1. How would you respond to the...,Do you think President Xi is being sincere abo...,False,False,False,1,Ambivalent,General
2,Joseph R. Biden,2,Q. No worries. Do you believe the country's sl...,"Look, I think China has a difficult economic p...",The question consists of two parts:\n\n1. Q1: ...,Question part: Q1 - Do you believe the country...,Do you believe the country's slowdown and gro...,False,False,False,2,Ambivalent,Partial/half-answer
3,Joseph R. Biden,2,Q. No worries. Do you believe the country's sl...,"Look, I think China has a difficult economic p...",The question consists of two parts:\n\n1. Q1: ...,Question part: Q1 - Do you believe the country...,Are you worried about the meeting between Pre...,False,False,False,3,Ambivalent,Dodging
4,Joseph R. Biden,3,"Q. I can imagine. It is evening, I'd like to r...","Well, I hope I get to see Mr. Xi sooner than l...",The question consists of 3 parts:\n1. Is the P...,Question part: 1. Is the President's engagemen...,Is the President's engagement with Asian coun...,False,False,False,4,Clear Reply,Explicit


In [9]:
train_df.columns
train_df[["clarity_label", "evasion_label"]].value_counts()


clarity_label    evasion_label      
Clear Reply      Explicit               1052
Ambivalent       Dodging                 706
                 Implicit                488
                 General                 386
                 Deflection              381
Clear Non-Reply  Declining to answer     145
                 Claims ignorance        119
                 Clarification            92
Ambivalent       Partial/half-answer      79
Name: count, dtype: int64

## combine question  + answers into input text

In [10]:
train_df["text"] = train_df["question"] + " [SEP] " + train_df["interview_answer"]
test_df["text"]  = test_df["question"]  + " [SEP] " + test_df["interview_answer"]


## clarity

In [11]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    train_df,
    test_size=0.1,
    stratify=train_df["clarity_label"],
    random_state=42
)

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, f1_score
from sklearn.pipeline import Pipeline

In [13]:
tfidf_lr_clarity = Pipeline([
    ("tfidf", TfidfVectorizer(
        ngram_range=(1,2),
        min_df=2,
        max_features=50000,
        sublinear_tf=True
    )),
    ("clf", LogisticRegression(
        max_iter=2000,
        class_weight="balanced",
        solver="liblinear"
    ))
])

In [14]:
tfidf_lr_clarity.fit(train_df["text"], train_df["clarity_label"])

val_preds = tfidf_lr_clarity.predict(val_df["text"])
print("=== Clarity Logistic Regression Baseline ===")
print(classification_report(val_df["clarity_label"], val_preds))
print("Macro-F1:", f1_score(val_df["clarity_label"], val_preds, average="macro"))

=== Clarity Logistic Regression Baseline ===
                 precision    recall  f1-score   support

     Ambivalent       0.67      0.85      0.75       204
Clear Non-Reply       0.57      0.44      0.50        36
    Clear Reply       0.58      0.32      0.41       105

       accuracy                           0.65       345
      macro avg       0.61      0.54      0.56       345
   weighted avg       0.63      0.65      0.62       345

Macro-F1: 0.5559602998627389


## evasion

In [15]:
tfidf_lr_evasion = Pipeline([
    ("tfidf", TfidfVectorizer(
        ngram_range=(1,2),
        min_df=2,
        max_features=50000,
        sublinear_tf=True
    )),
    ("clf", LogisticRegression(
        max_iter=2000,
        class_weight="balanced",
        solver="liblinear"
    ))
])

tfidf_lr_evasion.fit(train_df["text"], train_df["evasion_label"])

val_preds = tfidf_lr_evasion.predict(val_df["text"])
print("=== Evasion Logistic Regression Baseline ===")
print(classification_report(val_df["evasion_label"], val_preds))
print("Macro-F1:", f1_score(val_df["evasion_label"], val_preds, average="macro"))


=== Evasion Logistic Regression Baseline ===
                     precision    recall  f1-score   support

   Claims ignorance       0.62      0.36      0.45        14
      Clarification       0.73      0.89      0.80         9
Declining to answer       0.44      0.62      0.52        13
         Deflection       0.29      0.37      0.32        43
            Dodging       0.46      0.35      0.40        65
           Explicit       0.51      0.55      0.53       105
            General       0.25      0.26      0.25        39
           Implicit       0.21      0.18      0.20        49
Partial/half-answer       0.00      0.00      0.00         8

           accuracy                           0.40       345
          macro avg       0.39      0.40      0.39       345
       weighted avg       0.40      0.40      0.39       345

Macro-F1: 0.3863315173936872
