In [None]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Опционально: указать конкретную GPU (например, "0" или "0,1" для нескольких)
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Проверяем доступность GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используем устройство: {device}")

# Загружаем датасет
dataset = load_dataset("imdb")

# Инициализируем токенизатор и модель
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model.to(device)  # Перемещаем модель на GPU

# Токенизация
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Выбираем подмножества для ускоренного обучения
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

# Настройка параметров обучения
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    save_strategy="no",
    logging_dir='./logs',
    logging_steps=50,
    seed=42,
)
training_args.evaluation_strategy = "epoch"
# Создаем Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    tokenizer=tokenizer,
)

# Обучение
trainer.train()

# Оценка
trainer.evaluate()


Используем устройство: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m8tem[0m ([33m8tem-mirea[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
50,0.6914
100,0.3642


{'eval_loss': 0.3219926953315735,
 'eval_runtime': 27.784,
 'eval_samples_per_second': 35.992,
 'eval_steps_per_second': 4.499,
 'epoch': 1.0}

In [None]:
import torch.nn as nn
import math

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model=512, num_heads=8):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads

        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)

    def forward(self, x):
        batch_size = x.size(0)

        Q = self.W_q(x).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)
        K = self.W_k(x).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)
        V = self.W_v(x).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)

        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        attention = torch.softmax(scores, dim=-1)
        output = torch.matmul(attention, V)

        output = output.transpose(1,2).contiguous().view(batch_size, -1, self.d_model)
        return self.W_o(output)

In [None]:
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, ff_dim=2048):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.norm1 = nn.LayerNorm(d_model)
        self.ff = nn.Sequential(
            nn.Linear(d_model, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, d_model)
        )
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x):
        attn_output = self.attention(x)
        x = self.norm1(x + attn_output)
        ff_output = self.ff(x)
        return self.norm2(x + ff_output)

In [None]:
import torch
import torch.nn.functional as F

def predict_sentiment(texts, model, tokenizer, device):
    model.eval()
    # Токенизация
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    # Перемещаем входные данные на устройство модели (GPU или CPU)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=-1)
        preds = torch.argmax(probs, dim=-1)

    return preds.cpu().numpy(), probs.cpu().numpy()

# Пример использования
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используем устройство: {device}")

# Предполагается, что model и tokenizer уже загружены
model.to(device)

texts = [
    "This movie was fantastic! I really enjoyed it.",
    "Terrible film. Waste of time.",
    "It was okay, not the best but not the worst."
]

preds, probs = predict_sentiment(texts, model, tokenizer, device)

for text, pred, prob in zip(texts, preds, probs):
    label = "Positive" if pred == 1 else "Negative"
    confidence = prob[pred]
    print(f"Text: {text}\nPrediction: {label} (confidence: {confidence:.2f})\n")


Используем устройство: cuda
Text: This movie was fantastic! I really enjoyed it.
Prediction: Positive (confidence: 0.97)

Text: Terrible film. Waste of time.
Prediction: Negative (confidence: 0.96)

Text: It was okay, not the best but not the worst.
Prediction: Negative (confidence: 0.87)



In [None]:
# Получаем первые 10 примеров
test_samples = small_eval_dataset.select(range(10))

# Извлекаем тексты из токенизированных input_ids
texts = [tokenizer.decode(x['input_ids'], skip_special_tokens=True) for x in test_samples]

# Извлекаем метки
labels = [x['label'] for x in test_samples]

# Определяем устройство (GPU, если доступен)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # Перемещаем модель на устройство

# Предсказания модели с передачей устройства
preds, probs = predict_sentiment(texts, model, tokenizer, device)

# Вывод результатов
for text, true_label, pred, prob in zip(texts, labels, preds, probs):
    true_str = "Positive" if true_label == 1 else "Negative"
    pred_str = "Positive" if pred == 1 else "Negative"
    confidence = prob[pred]
    print(f"Text: {text}\nTrue: {true_str}, Predicted: {pred_str} (confidence: {confidence:.2f})\n")


Text: < br / > < br / > when i unsuspectedly rented a thousand acres, i thought i was in for an entertaining king lear story and of course michelle pfeiffer was in it, so what could go wrong? < br / > < br / > very quickly, however, i realized that this story was about a thousand other things besides just acres. i started crying and couldn ' t stop until long after the movie ended. thank you jane, laura and jocelyn, for bringing us such a wonderfully subtle and compassionate movie! thank you cast, for being involved and portraying the characters with such depth and gentleness! < br / > < br / > i recognized the angry sister ; the runaway sister and the sister in denial. i recognized the abusive husband and why he was there and then the father, oh oh the father... all superbly played. i also recognized myself and this movie was an eye - opener, a relief, a chance to face my own truth and finally doing something about it. i truly hope a thousand acres has had the same effect on some othe