In [None]:
from datasets import load_dataset
from transformers import (AutoTokenizer, 
                          DataCollatorWithPadding, 
                          AutoModelForSequenceClassification, 
                          TrainingArguments, 
                          Trainer, 
                          create_optimizer, 
                          pipeline)
import evaluate
import numpy as np
import torch

In [4]:
cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


CUDA available: True


In [None]:
imdb = load_dataset("imdb")
imdb.shape

{'train': (25000, 2), 'test': (25000, 2), 'unsupervised': (50000, 2)}

In [21]:
small_train = imdb["train"].shuffle().select(range(1000)) 
small_test = imdb["test"].shuffle().select(range(200)) 

In [6]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

In [7]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

In [22]:
tokenized_train = small_train.map(preprocess_function, batched=True)
tokenized_test = small_test.map(preprocess_function, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [11]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy = evaluate.load("accuracy")

In [12]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [13]:
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [15]:
def monitor_gpu_memory():
    """Моніторинг використання пам'яті GPU"""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated(0) / 1024**3
        reserved = torch.cuda.memory_reserved(0) / 1024**3
        print(f"GPU Memory - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB")
        return allocated, reserved
    return 0, 0
monitor_gpu_memory()

GPU Memory - Allocated: 0.25 GB, Reserved: 0.29 GB


(0.250485897064209, 0.28515625)

In [17]:
training_args = TrainingArguments(
    output_dir="my_model",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    push_to_hub=False,
    load_best_model_at_end=True,
    dataloader_num_workers=0,
    fp16=True, 
    gradient_accumulation_steps=4,  
    warmup_steps=500,
    report_to=None,                
    dataloader_pin_memory=False,
)

In [23]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
import time

print("\nПочаток тренування...")
start_time = time.time()

try:
    trainer.train()
    training_time = time.time() - start_time
    print(f"\nТренування завершено за {training_time:.2f} секунд ({training_time/3600:.2f} годин)")
except RuntimeError as e:
    if "out of memory" in str(e):
        print("Помилка: Недостатньо пам'яті GPU!")
    else:
        print(f"Помилка тренування: {e}")


Початок тренування...


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.691675,0.52
2,No log,0.651151,0.665



Тренування завершено за 4781.75 секунд (1.33 годин)


In [29]:
trainer.save_model()

In [34]:
text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."

In [35]:
classifier = pipeline("sentiment-analysis", model="my_model")
classifier(text)

Device set to use cuda:0


[{'label': 'POSITIVE', 'score': 0.5079233646392822}]