<a href="https://colab.research.google.com/github/beyza720/CENG463-HW2/blob/main/task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets torch
!pip install evaluate



In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score

from google.colab import drive
drive.mount('/content/drive')

train_path = '/content/drive/My Drive/Colab Notebooks/orientation-fr-train.tsv'
train_orientation_data = pd.read_csv(train_path, sep='\t')

class_0 = train_orientation_data[train_orientation_data['label'] == 0]
class_1 = train_orientation_data[train_orientation_data['label'] == 1]

class_0_oversampled = resample(class_0, replace=True, n_samples=len(class_1), random_state=42)
balanced_train_orientation_data = pd.concat([class_1, class_0_oversampled])

train_plus_val_orientation, test_orientation = train_test_split(
    balanced_train_orientation_data,
    test_size=0.1,
    stratify=balanced_train_orientation_data['label'],
    random_state=42
)

train_orientation, val_orientation = train_test_split(
    train_plus_val_orientation,
    test_size=0.1,
    stratify=train_plus_val_orientation['label'],
    random_state=42
)

print(len(train_orientation))
print(len(val_orientation))
print(len(test_orientation))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
4090
455
505


In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import Dataset
from transformers import TrainingArguments
import numpy as np
import evaluate
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import Trainer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained(
    "google-bert/bert-base-multilingual-cased", num_labels=2
)

train_dataset = Dataset.from_pandas(train_orientation)
val_dataset = Dataset.from_pandas(val_orientation)
test_dataset = Dataset.from_pandas(test_orientation)

def tokenize_function(examples):
    return tokenizer(examples["text_en"], padding="max_length", truncation=True)

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_val_dataset = val_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    report_to="none",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=256,
    save_steps=1000,
    save_total_limit=2
)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

test_preds = trainer.predict(tokenized_test_dataset)
print(test_preds.metrics)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4090 [00:00<?, ? examples/s]

Map:   0%|          | 0/455 [00:00<?, ? examples/s]

Map:   0%|          | 0/505 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6928,0.723508,0.538462,0.635396,0.538462,0.439217
2,0.6126,0.537491,0.736264,0.739827,0.736264,0.735225
3,0.408,0.508911,0.786813,0.787726,0.786813,0.786626


{'test_loss': 0.4859007000923157, 'test_accuracy': 0.7722772277227723, 'test_precision': 0.7745958595859587, 'test_recall': 0.7722772277227723, 'test_f1': 0.7718245394612652, 'test_runtime': 3.5762, 'test_samples_per_second': 141.213, 'test_steps_per_second': 8.948}


In [None]:
# the following part is for the casual language model

In [None]:
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

login(token="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")
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

causal_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)


Device set to use cuda:0


In [None]:
def create_prompt(text):
    examples = train_orientation.sample(n=30)
    prompt = "Analyze parliamentary speeches and classify as left-wing (0) or right-wing (1).\n\n"

    for _, row in examples.iterrows():
        prompt += f"Speech: {row['text']}\nClassification: {row['label']}\n\n"

    prompt += f"""Now classify this speech:

Speech: {text}

Key indicators:
Left-wing (0): Social welfare, workers' rights, public services
Right-wing (1): Free market, traditional values, limited government

Classification (ONLY 0 or 1):"""

    return prompt

test_texts = test_orientation['text'].tolist()
test_labels = test_orientation['label'].tolist()

predicted_labels = []
for text in test_texts:
    prompt = create_prompt(text)
    output = causal_pipeline(prompt,
        max_new_tokens=3,
        temperature=0.1,
        top_p=0.9,
        do_sample=False,
        num_return_sequences=1
    )
    pred_text = output[0]['generated_text'].strip()

    if '0' in pred_text[-5:]:
        predicted_labels.append(0)
    elif '1' in pred_text[-5:]:
        predicted_labels.append(1)
    else:
        predicted_labels.append(0)

metrics = {
    'accuracy': accuracy_score(test_labels, predicted_labels),
    'precision': precision_score(test_labels, predicted_labels, average='weighted'),
    'recall': recall_score(test_labels, predicted_labels, average='weighted'),
    'f1': f1_score(test_labels, predicted_labels, average='weighted')
}

print("\nCausal Model Results:")
for metric, value in metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")

print("\nClassification Report:")
print(classification_report(test_labels, predicted_labels))


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for


Causal Model Results:
Accuracy: 0.5347
Precision: 0.5379
Recall: 0.5347
F1: 0.5234

Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.69      0.60       253
           1       0.55      0.38      0.45       252

    accuracy                           0.53       505
   macro avg       0.54      0.53      0.52       505
weighted avg       0.54      0.53      0.52       505



In [None]:
def create_prompt(text):
   examples = train_orientation.sample(n=100)
   prompt = "Analyze parliamentary speeches and classify as left-wing (0) or right-wing (1).\n\n"

   for _, row in examples.iterrows():
       prompt += f"Speech: {row['text_en']}\nClassification: {row['label']}\n\n"

   prompt += f"""Now classify this speech:

Speech: {text}

Key indicators:
Left-wing (0): Social welfare, workers' rights, public services
Right-wing (1): Free market, traditional values, limited government

Classification (ONLY 0 or 1):"""

   return prompt

test_texts = test_orientation['text_en'].tolist()
test_labels = test_orientation['label'].tolist()

predicted_labels = []
for text in test_texts:
   prompt = create_prompt(text)
   output = causal_pipeline(prompt,
       max_new_tokens=3,
       temperature=0.1,
       top_p=0.9,
       do_sample=False,
       num_return_sequences=1
   )
   pred_text = output[0]['generated_text'].strip()

   if '0' in pred_text[-5:]:
       predicted_labels.append(0)
   elif '1' in pred_text[-5:]:
       predicted_labels.append(1)
   else:
       predicted_labels.append(0)

metrics = {
   'accuracy': accuracy_score(test_labels, predicted_labels),
   'precision': precision_score(test_labels, predicted_labels, average='weighted'),
   'recall': recall_score(test_labels, predicted_labels, average='weighted'),
   'f1': f1_score(test_labels, predicted_labels, average='weighted')
}

print("\nCausal Model Results:")
for metric, value in metrics.items():
   print(f"{metric.capitalize()}: {value:.4f}")

print("\nClassification Report:")
print(classification_report(test_labels, predicted_labels))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for


Causal Model Results:
Accuracy: 0.5010
Precision: 0.5005
Recall: 0.5010
F1: 0.3859

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.93      0.65       253
           1       0.50      0.07      0.12       252

    accuracy                           0.50       505
   macro avg       0.50      0.50      0.39       505
weighted avg       0.50      0.50      0.39       505

