In [1]:
!pip install transformers datasets torch evaluate huggingface_hub

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m8

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, BertTokenizer, BertModel, AutoModelForCausalLM, pipeline
from datasets import Dataset
from google.colab import userdata
import torch
import numpy
from huggingface_hub import login
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
power_data = pd.read_csv("/content/power-lv-train.tsv", sep="\t")
dataset = Dataset.from_pandas(power_data)


In [4]:
login(userdata.get('h_token'))

In [5]:
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.1-8B-Instruct",
    offload_folder="auto",
)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B-Instruct",
    offload_folder="auto",
    device_map="auto",
    torch_dtype=torch.float16
)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [6]:
def inference(batch, lang="text"):
    results = []
    for text in tqdm(batch[lang], desc=f"Classifying texts ({lang})"):
        prompt = (
           f"Classify the following political statement into one of two categories: 'opposition' or 'governing'. "
            f"Respond with only the category, and say 'the answer is 'opposition.' or 'the answer is 'governing.'."
            f"Statement: {text}\n\n"
            f"Answer:"
        )

        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7, num_return_sequences=1)

        raw_prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        prediction = raw_prediction.split("Answer:")[1].strip().lower()

        if "governing" in prediction.lower():
            results.append(0)
        else:
            results.append(1)

    return {"predictions": results}



en_predictions = []
batch_size = 10

for i in range(0, len(dataset), batch_size):
    batch = dataset[i : i + batch_size]
    predictions = inference(batch, "text_en")
    en_predictions.extend(predictions["predictions"])

lv_predictions = []
for i in range(0, len(dataset), batch_size):
    batch = dataset[i : i + batch_size]
    predictions = inference(batch)
    lv_predictions.extend(predictions["predictions"])


Classifying texts (text_en):   0%|          | 0/10 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  10%|█         | 1/10 [00:03<00:34,  3.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  20%|██        | 2/10 [00:05<00:22,  2.76s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  30%|███       | 3/10 [00:07<00:16,  2.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  40%|████      | 4/10 [00:09<00:13,  2.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  50%|█████     | 5/10 [00:11<00:10,  2.17s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Classifying texts (text_en):  60%|██████    | 6/10 [00:12<00:05,  1.48s/it]Setting `pad_token_id` to `eos_token_id`:1280

In [7]:
print("Classification Report for Latvian:")
print(classification_report(dataset["label"], lv_predictions))

print("Classification Report for English:")
print(classification_report(dataset["label"], en_predictions))

Classification Report for Latvian:
              precision    recall  f1-score   support

           0       0.75      0.27      0.39       944
           1       0.35      0.82      0.49       466

    accuracy                           0.45      1410
   macro avg       0.55      0.54      0.44      1410
weighted avg       0.62      0.45      0.43      1410

Classification Report for English:
              precision    recall  f1-score   support

           0       0.72      0.57      0.64       944
           1       0.39      0.56      0.46       466

    accuracy                           0.57      1410
   macro avg       0.56      0.57      0.55      1410
weighted avg       0.61      0.57      0.58      1410
