# DeepSeek fallacy detection


In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import torch
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

In [3]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from sklearn.metrics import accuracy_score, f1_score
from transformers import EvalPrediction

def compute_metrics(eval_pred: EvalPrediction):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    f1 = f1_score(labels, predictions, average="weighted")
    f1_class_0 = f1_score(labels, predictions, pos_label=0, average="binary")
    f1_class_1 = f1_score(labels, predictions, pos_label=1, average="binary")
    accuracy = accuracy_score(labels, predictions)

    return {
        "accuracy": accuracy,
        "f1": f1,
        "f1_class_0": f1_class_0,
        "f1_class_1": f1_class_1}

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
import torch
import torch.nn as nn
from transformers import BitsAndBytesConfig


# === MODEL ===
# 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",  
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True 
)

# Upload the model
model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True)


class ModifiedModelForBinaryClassification(nn.Module):
    def __init__(self, original_model):
        super().__init__()
        self.transformer = original_model.model
        self.lm_head = nn.Linear(self.transformer.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state
        pooled_output = hidden_states.mean(dim=1)
        pooled_output = pooled_output.to(dtype=self.lm_head.weight.dtype)
        logits = self.lm_head(pooled_output)

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, 2), labels.view(-1))
            return loss, logits

        return logits  


model = ModifiedModelForBinaryClassification(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

ModifiedModelForBinaryClassification(
  (transformer): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
   

In [None]:
from datasets import Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer

model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token 

df = pd.read_csv("/content/drive/MyDrive/UPV master/HAIA/train_afd.csv")
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["Etiqueta"], random_state=42)
train_df = train_df.rename(columns={"Etiqueta": "labels"})
val_df = val_df.rename(columns={"Etiqueta": "labels"})

# === TOKENIZATION ===
def tokenize_function(examples):
    prompt = "Your task is to detect the type of fallacy in the Text. The label should be 1 (it is a fallacy) or 0 (it is not a fallacy) ▶ Text Snippet: "
    text_with_prompt = [prompt + text for text in examples["Texto"]]
    return tokenizer(text_with_prompt, padding="max_length", truncation=True, max_length=128)

train_dataset = Dataset.from_pandas(train_df).map(tokenize_function, batched=True)
val_dataset = Dataset.from_pandas(val_df).map(tokenize_function, batched=True)
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

Map:   0%|          | 0/13694 [00:00<?, ? examples/s]

Map:   0%|          | 0/3424 [00:00<?, ? examples/s]

In [None]:
from tqdm import tqdm  
from torch.utils.data import DataLoader


model.eval()
dataloader = DataLoader(val_dataset, batch_size=8)
predictions = []
with torch.no_grad():
    for batch in tqdm(dataloader, desc="Inferencia", total=len(dataloader)):  
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        input_ids = input_ids.to(dtype=torch.long) 
        attention_mask = attention_mask.to(dtype=torch.bfloat16)  
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs
        preds = torch.argmax(logits, dim=-1).cpu().numpy()
        predictions.extend(preds)

val_df['predicted_label'] = predictions

Inferencia: 100%|██████████| 428/428 [49:05<00:00,  6.88s/it]


In [19]:
val_df

Unnamed: 0,Texto,labels,predicted_label
5653,I also believe in the Golden Rule.,0.0,0
14867,Joe Biden will be a president who brings our c...,0.0,0
11257,"Well, first of all, I think it's important for...",0.0,0
13847,Can they rely on us?,0.0,0
5853,"I -- of course there's a lot -- look, global w...",0.0,0
...,...,...,...
3827,The good news is we're making progress.,0.0,0
11975,Governor Romney doesn't have a five-point plan.,1.0,0
4399,But it's not good enough.,0.0,0
8984,We're not talking about third-world drugs.,0.0,0


In [None]:
from sklearn.metrics import accuracy_score, f1_score

accuracy = accuracy_score(val_df['labels'], val_df['predicted_label'])
f1 = f1_score(val_df['labels'], val_df['predicted_label'], average='weighted')
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8814
F1 Score: 0.8563


In [None]:
from datasets import concatenate_datasets

# === TRAIN WITH ALL THE DATASET ===
full_train_df = concatenate_datasets([train_dataset, val_dataset]).map(tokenize_function, batched=True)
full_train_df.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

Map:   0%|          | 0/17118 [00:00<?, ? examples/s]

In [None]:
# === TEST PREPROCESSING ===
test_df = pd.read_csv("/content/drive/MyDrive/UPV master/HAIA/test_afd.csv")
test_df = test_df.rename(columns={"Etiqueta": "labels"})
test_dataset = Dataset.from_pandas(test_df).map(tokenize_function, batched=True)
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataloader = DataLoader(test_dataset, batch_size=8)


# === PREDICTION ===
model.eval()
predictions = []
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc="Inferencia test", total=len(test_dataloader)):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        input_ids = input_ids.to(dtype=torch.long)
        attention_mask = attention_mask.to(dtype=torch.bfloat16)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs
        preds = torch.argmax(logits, dim=-1).cpu().numpy()
        predictions.extend(preds)


test_df["predicted_label"] = predictions

Map:   0%|          | 0/2175 [00:00<?, ? examples/s]

Inferencia test: 100%|██████████| 272/272 [31:12<00:00,  6.88s/it]


In [28]:
test_df

Unnamed: 0,Texto,labels,predicted_label
0,We got to take a look at what I was left when ...,,0
1,We had an economy that was in free fall.,,0
2,The pandemic was so badly handled.,,0
3,Many people were dying.,,0
4,"All he said was, it's not that serious.",,0
...,...,...,...
2170,She gave a lot of it away to the Taliban.,,0
2171,She gave it to Afghanistan.,,0
2172,What these people have done to our country and...,,0
2173,Many of them are criminals and they're destroy...,,0


In [None]:
label_counts = test_df['predicted_label'].value_counts()
print(label_counts)

predicted_label
0    2141
1      34
Name: count, dtype: int64


In [30]:
test_df.to_csv("predicciones.csv", index=False)