In [5]:
import pandas as pd
import numpy as np
import torch
import json
import os
from transformers import T5Tokenizer, T5ForConditionalGeneration, BertTokenizer, BertModel
from datasets import Dataset
from sklearn.metrics import f1_score
from rouge_score import rouge_scorer
import nltk
from bert_score import score as bert_score
from nltk.translate.meteor_score import meteor_score

nltk.download('wordnet')
nltk.download('omw-1.4')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def convert_to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, dict):
        return {k: convert_to_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    else:
        return obj

def save_json(data, file_name):
    serializable_data = convert_to_serializable(data)
    with open(file_name, "w") as f:
        json.dump(serializable_data, f, indent=4)

def save_predictions_to_csv(predictions, references, filename):
    df = pd.DataFrame({
        'predictions': predictions,
        'ground_truth': references
    })
    df.to_csv(filename, index=False)

def get_bert_embeddings(texts, tokenizer, model):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.cpu().numpy()

def calculate_yisi(preds, refs, tokenizer, model):
    scores = []
    for pred, ref in zip(preds, refs):
        pred_emb = get_bert_embeddings([pred], tokenizer, model)[0].mean(axis=0)
        ref_emb = get_bert_embeddings([ref], tokenizer, model)[0].mean(axis=0)
        sim = np.dot(pred_emb, ref_emb) / (np.linalg.norm(pred_emb) * np.linalg.norm(ref_emb) + 1e-8)
        f1 = 2 * sim * sim / (sim + sim + 1e-8)
        scores.append(f1)
    return np.mean(scores)

def calculate_moverscore(preds, refs, tokenizer, model):
    scores = []
    for pred, ref in zip(preds, refs):
        pred_emb = get_bert_embeddings([pred], tokenizer, model)[0].mean(axis=0)
        ref_emb = get_bert_embeddings([ref], tokenizer, model)[0].mean(axis=0)
        dist = np.linalg.norm(pred_emb - ref_emb)
        scores.append(1 / (1 + dist))
    return np.mean(scores)

def calculate_metrics(model, tokenizer, dataset):
    model.eval()
    preds, refs = [], []
     
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

    with torch.no_grad():
        for example in dataset:
            input_text = example['post']
            inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
            output_ids = model.generate(**inputs)
            pred = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
            print(pred)
            preds.append(pred)
            refs.append(example['new_topic'])

    f1 = f1_score(refs, preds, average='weighted')
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = [scorer.score(ref, pred) for ref, pred in zip(refs, preds)]
    rouge1 = np.mean([s['rouge1'].fmeasure for s in rouge_scores])
    rougeL = np.mean([s['rougeL'].fmeasure for s in rouge_scores])
    meteor = np.mean([meteor_score([ref.split()], pred.split()) for ref, pred in zip(refs, preds)])
    P, R, F1 = bert_score(preds, refs, lang="en", verbose=False)
    bert_f1 = F1.mean().item()
    yisi = calculate_yisi(preds, refs, bert_tokenizer, bert_model)
    moverscore = calculate_moverscore(preds, refs, bert_tokenizer, bert_model)

    metrics = {
        "F1": f1,
        "ROUGE-1": rouge1,
        "ROUGE-L": rougeL,
        "METEOR": meteor,
        "BERTScore": bert_f1,
        "YiSi": yisi,
        "MoverScore": moverscore
    }

    return metrics, preds, refs

def test_multiple_files(test_file_paths, model_dir="t5_trained_model"):
    print(f"Loading tokenizer and model from {model_dir}...")
    tokenizer = T5Tokenizer.from_pretrained(model_dir)
    model = T5ForConditionalGeneration.from_pretrained(model_dir).to(device)

    for test_file_path in test_file_paths:
        print(f"\n🔍 Testing on: {test_file_path}")
        df = pd.read_csv(test_file_path, encoding="ISO-8859-1")
        df['post'] = df['post'].str.strip().str.lower()
        dataset = Dataset.from_pandas(df)

        metrics, preds, refs = calculate_metrics(model, tokenizer, dataset)

        base_name = os.path.splitext(os.path.basename(test_file_path))[0]
        pred_file = f"test_predictions_{base_name}.csv"
        metric_file = f"test_metrics_{base_name}.json"

        save_predictions_to_csv(preds, refs, pred_file)
        save_json(metrics, metric_file)

        print(f"✅ Saved: {pred_file}, {metric_file}")

# 👇 Add your test file paths here
if __name__ == "__main__":
    test_files = [
        "tse_implicit.csv",
        "vast_filtered_im.csv"
    ]
    test_multiple_files(test_files)

[nltk_data] Downloading package wordnet to C:\Users\CSE
[nltk_data]     RGUKT\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\CSE
[nltk_data]     RGUKT\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Using device: cuda
Loading tokenizer and model from t5_trained_model...

🔍 Testing on: tse_implicit.csv
genesis
evolution
big bang
genetic tests
id
fossils
a teaching of religion
congressional report
a good read
evolution
albert einstein
fundamentalist
bible
dr. rihards
comment sections
a loss
id
denial
god
smoking guns
evolution
earth's geology
expansion of the universe
common descent
creationists
supernatural
christians
freudian slip
'creationist'
aclu
science
proof
evolution
big bang theory
kent hovind
christians
evolution
religious beliefs
a good discussion
bible
debate
genome
createdebate argument award
scientific evidence
bible
evolution
id
atheists
creation ex nihilo
intelligent design
universe
aig
id
a true statement
evolution
ncse
god
mathematical postulates
evolution
evolution
id
evolution
invisible gnomes
id proponents
intelligent design
id
a bad world
free will
non-existence
fossils
evolution
carter
evolution
science
evolution
evolution
philosophers
evolution
the bible
id
n

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Saved: test_predictions_tse_implicit.csv, test_metrics_tse_implicit.json

🔍 Testing on: vast_filtered_im.csv
health care law
train stations
ratings system
facebook
middle east
money management
casinos
tip
computerized writing
online courses
a revivified c.i.a.
isis
offshore drilling
company
flag burning
hate
christians
palestinian authority
detroit
bike lanes
aid workers
legalization
immigration
cost-conscious consumers
facts
gay people
legalizing sex trafficking
eating
oil production
homosexuality
religious beliefs
happiness
john addams
pageant experience
equal rights
modern society
lgbt advocates
police
surf breaks
terrorist
immunization
higher education
computer
tax
rim
israel
fear
tenure
marijuana
marijuana
marriage
mental health
public schools
israel
christians
prostitution
students
marijuana
immigration
tax breaks
insurance
real estate
financial services
noncompete agreements
medicare
aca
nato
immigration
judicial restraint
home schoolers
college admissions
tip
economic climate

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Saved: test_predictions_vast_filtered_im.csv, test_metrics_vast_filtered_im.json


In [6]:
import os
import pandas as pd
import google.generativeai as genai
from sklearn.metrics import classification_report, precision_score, f1_score
import time

# Set your Gemini API key
GOOGLE_API_KEY = 'AIzaSyCNSTF-sWpBj26WB-goWIyPggSShND9gG8'
genai.configure(api_key=GOOGLE_API_KEY)

# Load Gemini model
model = genai.GenerativeModel('gemini-1.5-flash')

# CSV files to process
csv_files = [
    'test_predictions_tse_explicit.csv',
    'test_predictions_tse_implicit.csv',
    'test_predictions_vast_filtered_im.csv'
]

# For final metrics
all_true_labels = []
all_pred_labels = []

def get_gemini_stance(post, keyphrase):
    time.sleep(5)  # Optional delay to prevent rate limits
    prompt = f"""You are a stance detection assistant. Your task is to determine the stance of the following post towards a given keyphrase. Only respond with one of the following: FAVOR, AGAINST, or NONE.

Post: "{post}"
Keyphrase: "{keyphrase}"

Stance:"""
    try:
        response = model.generate_content(prompt)
        response_text = response.text.strip().upper()
        print(f"[Gemini]: {response_text}")

        for option in ['FAVOR', 'AGAINST', 'NONE']:
            if response_text.strip() == option:
                return option
        return "NONE"
    except Exception as e:
        print(f"❌ Error: {e}")
        return "NONE"

# Go through each CSV file
for file in csv_files:
    print(f"\n📂 Processing file: {file}")
    df = pd.read_csv(file)

    if not {'post', 'predictions', 'GT Stance'}.issubset(df.columns):
        raise ValueError(f"CSV {file} must have 'post', 'predictions', and 'GT Stance' columns.")

    # Get predictions from Gemini
    df['predicted_stance'] = df.apply(lambda row: get_gemini_stance(row['post'], row['predictions']), axis=1)

    # Save back to same file (or modify if you want to save as new)
    df.to_csv(file, index=False)
    print(f"✅ Saved updated file with predictions to: {file}")

    # Evaluate metrics
    y_true = df['GT Stance']
    y_pred = df['predicted_stance']
    all_true_labels.extend(y_true.tolist())
    all_pred_labels.extend(y_pred.tolist())

    print(f"\n📊 Metrics for: {file}")
    print(classification_report(y_true, y_pred, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
    print(f"Precision (macro): {precision_score(y_true, y_pred, average='macro', zero_division=0):.4f}")
    print(f"F1 Score (macro): {f1_score(y_true, y_pred, average='macro', zero_division=0):.4f}")

# Combined results across all files
print("\n🧮 Overall Combined Results:")
print(classification_report(all_true_labels, all_pred_labels, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
print(f"Overall Precision (macro): {precision_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")
print(f"Overall F1 Score (macro): {f1_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")



📂 Processing file: test_predictions_tse_explicit.csv
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: NONE
[Gemini]: NONE
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: AGAINST
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: NONE
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: NONE
[Gemini]: NONE
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: FAVOR
[Gemini]: NONE
[Gemini]: NONE
[Gemini]: FAVOR
[Gemini]: NONE
[Gemini]

Ollama


In [4]:
import os
import pandas as pd
from sklearn.metrics import classification_report, precision_score, f1_score
import ollama
import time

# CSV files to process
csv_files = [
    'test_predictions_tse_explicit.csv',
    'test_predictions_tse_implicit.csv',
    'test_predictions_vast_filtered_im.csv'
]

# For final metrics
all_true_labels = []
all_pred_labels = []

def get_ollama_stance(post, keyphrase):
    time.sleep(1)  # Reduced delay since local models typically have lower rate limits
    prompt = f"""You are a stance detection assistant. Your task is to determine the stance of the following post towards a given keyphrase. Only respond with one of the following: FAVOR, AGAINST, or NONE.

Post: "{post}"
Keyphrase: "{keyphrase}"

Stance:"""
    try:
        # Assuming 'llama3' or another model is installed locally; replace with your model name if different
        response = ollama.generate(model='llama3', prompt=prompt)
        response_text = response['response'].strip().upper()
        print(f"[Ollama]: {response_text}")

        # Validate response
        for option in ['FAVOR', 'AGAINST', 'NONE']:
            if response_text.strip() == option:
                return option
        return "NONE"
    except Exception as e:
        print(f"❌ Error: {e}")
        return "NONE"

# Go through each CSV file
for file in csv_files:
    print(f"\n📂 Processing file: {file}")
    if not os.path.exists(file):
        raise FileNotFoundError(f"CSV file {file} not found.")
    
    df = pd.read_csv(file)

    # Check required columns
    if not {'post', 'predictions', 'GT Stance'}.issubset(df.columns):
        raise ValueError(f"CSV {file} must have 'post', 'predictions', and 'GT Stance' columns.")

    # Get predictions from Ollama
    df['predicted_stance'] = df.apply(lambda row: get_ollama_stance(row['post'], row['predictions']), axis=1)

    # Save back to same file
    df.to_csv(file, index=False)
    print(f"✅ Saved updated file with predictions to: {file}")

    # Evaluate metrics
    y_true = df['GT Stance']
    y_pred = df['predicted_stance']
    all_true_labels.extend(y_true.tolist())
    all_pred_labels.extend(y_pred.tolist())

    print(f"\n📊 Metrics for: {file}")
    print(classification_report(y_true, y_pred, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
    print(f"Precision (macro): {precision_score(y_true, y_pred, average='macro', zero_division=0):.4f}")
    print(f"F1 Score (macro): {f1_score(y_true, y_pred, average='macro', zero_division=0):.4f}")

# Combined results across all files
print("\n🧮 Overall Combined Results:")
print(classification_report(all_true_labels, all_pred_labels, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
print(f"Overall Precision (macro): {precision_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")
print(f"Overall F1 Score (macro): {f1_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")


📂 Processing file: test_predictions_tse_explicit.csv
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: NONE
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: NONE
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: FAVOR
[Ollama]: AGAINST
[Ollama]: FAVOR
[Ollama]: FAVOR
[Oll

In [1]:
import os
import pandas as pd
from sklearn.metrics import classification_report, precision_score, f1_score
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time

# CSV files to process
csv_files = [
  'test_predictions_vast_filtered_ex.csv'
]

# For final metrics
all_true_labels = []
all_pred_labels = []

# Load BERTweet model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)  # 3 labels: FAVOR, AGAINST, NONE

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Stance labels mapping
label_map = {0: "FAVOR", 1: "AGAINST", 2: "NONE"}
reverse_label_map = {"FAVOR": 0, "AGAINST": 1, "NONE": 2}

def get_bertweet_stance(post, keyphrase):
    """
    Predict stance using BERTweet.
    Input: post (string), keyphrase (string)
    Output: stance (FAVOR, AGAINST, NONE)
    """
    # Combine post and keyphrase for input (you can experiment with formatting)
    input_text = f"{post} [SEP] {keyphrase}"
    
    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {key: val.to(device) for key, val in inputs.items()}  # Move to GPU/CPU
    
    # Inference
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
    
    return label_map[predicted_class]

# Go through each CSV file
for file in csv_files:
    print(f"\n📂 Processing file: {file}")
    if not os.path.exists(file):
        raise FileNotFoundError(f"CSV file {file} not found.")
    
    df = pd.read_csv(file)

    # Check required columns (adjust 'predictions' to your keyphrase column name if different)
    if not {'post', 'predictions', 'GT Stance'}.issubset(df.columns):
        raise ValueError(f"CSV {file} must have 'post', 'predictions', and 'GT Stance' columns.")

    # Get predictions from BERTweet
    df['predicted_stance_ BERTTWEET'] = df.apply(lambda row: get_bertweet_stance(row['post'], row['predictions']), axis=1)

    # Save back to same file
    df.to_csv(file, index=False)
    print(f"✅ Saved updated file with predictions to: {file}")

    # Evaluate metrics
    y_true = df['GT Stance']
    y_pred = df['predicted_stance_ BERTTWEET']
    all_true_labels.extend(y_true.tolist())
    all_pred_labels.extend(y_pred.tolist())

    print(f"\n📊 Metrics for: {file}")
    print(classification_report(y_true, y_pred, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
    print(f"Precision (macro): {precision_score(y_true, y_pred, average='macro', zero_division=0):.4f}")
    print(f"F1 Score (macro): {f1_score(y_true, y_pred, average='macro', zero_division=0):.4f}")

# Combined results across all files
print("\n🧮 Overall Combined Results:")
print(classification_report(all_true_labels, all_pred_labels, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
print(f"Overall Precision (macro): {precision_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")
print(f"Overall F1 Score (macro): {f1_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📂 Processing file: test_predictions_vast_filtered_ex.csv
✅ Saved updated file with predictions to: test_predictions_vast_filtered_ex.csv

📊 Metrics for: test_predictions_vast_filtered_ex.csv
              precision    recall  f1-score   support

       FAVOR       0.37      0.05      0.08      1371
     AGAINST       0.63      0.02      0.04      1728
        NONE       0.01      0.95      0.01        21

    accuracy                           0.04      3120
   macro avg       0.34      0.34      0.05      3120
weighted avg       0.52      0.04      0.06      3120

Precision (macro): 0.3382
F1 Score (macro): 0.0464

🧮 Overall Combined Results:
              precision    recall  f1-score   support

       FAVOR       0.37      0.05      0.08      1371
     AGAINST       0.63      0.02      0.04      1728
        NONE       0.01      0.95      0.01        21

    accuracy                           0.04      3120
   macro avg       0.34      0.34      0.05      3120
weighted avg       0.

In [12]:
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from torch.utils.data import Dataset

# Custom Dataset class
class StanceDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.label_map = {"FAVOR": 0, "AGAINST": 1, "NONE": 2}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        post = str(self.data.iloc[idx]['post'])
        keyphrase = str(self.data.iloc[idx]['ground_truth'])  # Adjust column name if needed
        label = self.label_map[self.data.iloc[idx]['GT Stance']]
        
        encoding = self.tokenizer(
            f"{post} [SEP] {keyphrase}",
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(label, dtype=torch.long)
        }

# Load data
df_train = pd.read_csv("test_predictions_vast_filtered_ex.csv")  # Your training data
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
train_dataset = StanceDataset(df_train, tokenizer)

# Load model
model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)

# Training arguments (no evaluation)
training_args = TrainingArguments(
    output_dir="./bertweet_stance",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="no",  # Disable evaluation
    save_strategy="epoch",     # Still save after each epoch
    # Remove load_best_model_at_end since no evaluation
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

# Train
trainer.train()

# Save fine-tuned model
model.save_pretrained("bertweet_stance_finetuned")
tokenizer.save_pretrained("bertweet_stance_finetuned")

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x0000028C0488CEE0>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 28b99f8ae30, raw_cell="from transformers import Trainer, TrainingArgument.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/c%3A/Users/CSE%20RGUKT/Desktop/Stance/Stance_Testing.ipynb#X11sZmlsZQ%3D%3D>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
10,1.1412
20,1.1047
30,1.0504
40,0.9558
50,0.9099
60,0.8278
70,0.7734
80,0.7656
90,0.7475
100,0.7454


('bertweet_stance_finetuned\\tokenizer_config.json',
 'bertweet_stance_finetuned\\special_tokens_map.json',
 'bertweet_stance_finetuned\\vocab.txt',
 'bertweet_stance_finetuned\\bpe.codes',
 'bertweet_stance_finetuned\\added_tokens.json')

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x0000028C0488CEE0>> (for post_run_cell), with arguments args (<ExecutionResult object at 28b9d0037f0, execution_count=12 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 28b99f8ae30, raw_cell="from transformers import Trainer, TrainingArgument.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/c%3A/Users/CSE%20RGUKT/Desktop/Stance/Stance_Testing.ipynb#X11sZmlsZQ%3D%3D> result=('bertweet_stance_finetuned\\tokenizer_config.json', 'bertweet_stance_finetuned\\special_tokens_map.json', 'bertweet_stance_finetuned\\vocab.txt', 'bertweet_stance_finetuned\\bpe.codes', 'bertweet_stance_finetuned\\added_tokens.json')>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [13]:
import os
import pandas as pd
from sklearn.metrics import classification_report, precision_score, f1_score
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time

# CSV files to process
csv_files = [
    'test_predictions_tse_explicit.csv',
    'test_predictions_tse_implicit.csv',
    'test_predictions_vast_filtered_im.csv'
]

# For final metrics
all_true_labels = []
all_pred_labels = []

# Load BERTweet model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
# model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=3)  # 3 labels: FAVOR, AGAINST, NONE
tokenizer = AutoTokenizer.from_pretrained("bertweet_stance_finetuned")
model = AutoModelForSequenceClassification.from_pretrained("bertweet_stance_finetuned")


# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Stance labels mapping
label_map = {0: "FAVOR", 1: "AGAINST", 2: "NONE"}
reverse_label_map = {"FAVOR": 0, "AGAINST": 1, "NONE": 2}

def get_bertweet_stance(post, keyphrase):
    """
    Predict stance using BERTweet.
    Input: post (string), keyphrase (string)
    Output: stance (FAVOR, AGAINST, NONE)
    """
    # Combine post and keyphrase for input (you can experiment with formatting)
    input_text = f"{post} [SEP] {keyphrase}"
    
    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {key: val.to(device) for key, val in inputs.items()}  # Move to GPU/CPU
    
    # Inference
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
    
    return label_map[predicted_class]

# Go through each CSV file
for file in csv_files:
    print(f"\n📂 Processing file: {file}")
    if not os.path.exists(file):
        raise FileNotFoundError(f"CSV file {file} not found.")
    
    df = pd.read_csv(file)

    # Check required columns (adjust 'predictions' to your keyphrase column name if different)
    if not {'post', 'predictions', 'GT Stance'}.issubset(df.columns):
        raise ValueError(f"CSV {file} must have 'post', 'predictions', and 'GT Stance' columns.")

    # Get predictions from BERTweet
    df['predicted_stance_ BERTTWEET'] = df.apply(lambda row: get_bertweet_stance(row['post'], row['predictions']), axis=1)

    # Save back to same file
    df.to_csv(file, index=False)
    print(f"✅ Saved updated file with predictions to: {file}")

    # Evaluate metrics
    y_true = df['GT Stance']
    y_pred = df['predicted_stance_ BERTTWEET']
    all_true_labels.extend(y_true.tolist())
    all_pred_labels.extend(y_pred.tolist())

    print(f"\n📊 Metrics for: {file}")
    print(classification_report(y_true, y_pred, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
    print(f"Precision (macro): {precision_score(y_true, y_pred, average='macro', zero_division=0):.4f}")
    print(f"F1 Score (macro): {f1_score(y_true, y_pred, average='macro', zero_division=0):.4f}")

# Combined results across all files
print("\n🧮 Overall Combined Results:")
print(classification_report(all_true_labels, all_pred_labels, labels=["FAVOR", "AGAINST", "NONE"], zero_division=0))
print(f"Overall Precision (macro): {precision_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")
print(f"Overall F1 Score (macro): {f1_score(all_true_labels, all_pred_labels, average='macro', zero_division=0):.4f}")

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x0000028C0488CEE0>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 28b57545c90, raw_cell="import os
import pandas as pd
from sklearn.metrics.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/c%3A/Users/CSE%20RGUKT/Desktop/Stance/Stance_Testing.ipynb#X13sZmlsZQ%3D%3D>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given


📂 Processing file: test_predictions_tse_explicit.csv
✅ Saved updated file with predictions to: test_predictions_tse_explicit.csv

📊 Metrics for: test_predictions_tse_explicit.csv
              precision    recall  f1-score   support

       FAVOR       0.52      0.51      0.52       801
     AGAINST       0.44      0.64      0.52       702
        NONE       0.00      0.00      0.00       301

    accuracy                           0.48      1804
   macro avg       0.32      0.38      0.35      1804
weighted avg       0.40      0.48      0.43      1804

Precision (macro): 0.3204
F1 Score (macro): 0.3455

📂 Processing file: test_predictions_tse_implicit.csv
✅ Saved updated file with predictions to: test_predictions_tse_implicit.csv

📊 Metrics for: test_predictions_tse_implicit.csv
              precision    recall  f1-score   support

       FAVOR       0.44      0.33      0.38       516
     AGAINST       0.46      0.74      0.56       502
        NONE       0.00      0.00      0.00  

TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given