In [None]:
!pip install transformers
!pip install numpy==1.26.4

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [None]:
import pandas as pd
import torch
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


def tokenize(example):
    return tokenizer(example["review"], truncation=True)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.argmax(torch.tensor(logits), axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

def predict_sentiment(texts, batch_size=128, max_length=512):
    all_preds = []
    all_probs = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Predicting"):
    batch = texts[i:i+batch_size]
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True,
                       max_length=max_length).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        preds = torch.argmax(probs, dim=1)

    all_preds.extend(preds.cpu().numpy())
    all_probs.extend(probs.cpu().numpy())

    return all_preds, all_probs

### DistilBert

In [None]:
df_all = pd.read_csv('/content/IMDB Dataset.csv')

df_all = df_all.sample(frac=1, random_state=42).reset_index(drop=True)
df_all["label"] = df_all["sentiment"].map({"negative": 0, "positive": 1})

train_df, val_df = train_test_split(df_all, test_size=0.1, random_state=42)
train_dataset = Dataset.from_pandas(train_df[["review", "label"]])
val_dataset = Dataset.from_pandas(val_df[["review", "label"]])

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

train_dataset = train_dataset.remove_columns(["review"])
val_dataset = val_dataset.remove_columns(["review"])
train_dataset.set_format("torch")
val_dataset.set_format("torch")


model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased-finetuned-sst-2-english", num_labels=2
)

training_args = TrainingArguments(
    dataloader_num_workers=2,
    report_to=[],
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=200,
    save_strategy="steps",
    save_steps=200,
    num_train_epochs=2,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=64,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    #use_cpu=True,
    fp16=True,
)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()


model.save_pretrained("./finetuned-distilbert-sentiment")
tokenizer.save_pretrained("./finetuned-distilbert-sentiment")

  trainer = Trainer(


Step,Training Loss,Validation Loss,Accuracy
200,0.2392,0.183361,0.9292
400,0.1902,0.17966,0.934
600,0.0824,0.188229,0.9356
800,0.1001,0.17569,0.9378


('./finetuned-distilbert-sentiment/tokenizer_config.json',
 './finetuned-distilbert-sentiment/special_tokens_map.json',
 './finetuned-distilbert-sentiment/vocab.txt',
 './finetuned-distilbert-sentiment/added_tokens.json',
 './finetuned-distilbert-sentiment/tokenizer.json')

### Roberta

In [None]:
df_all = pd.read_csv('/content/IMDB Dataset.csv')

df_all = df_all.sample(frac=1, random_state=42).reset_index(drop=True)
df_all["label"] = df_all["sentiment"].map({"negative": 0, "positive": 1})

train_df, val_df = train_test_split(df_all, test_size=0.1, random_state=42)
train_dataset = Dataset.from_pandas(train_df[["review", "label"]])
val_dataset = Dataset.from_pandas(val_df[["review", "label"]])

tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

train_dataset = train_dataset.remove_columns(["review"])
val_dataset = val_dataset.remove_columns(["review"])
train_dataset.set_format("torch")
val_dataset.set_format("torch")


model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

training_args = TrainingArguments(
    report_to=[],
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=416,
    save_strategy="steps",
    save_steps=416,
    num_train_epochs=2,
    per_device_train_batch_size=54,
    per_device_eval_batch_size=54,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    dataloader_num_workers=2,
)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()


model.save_pretrained("./finetuned-roberta-sentiment")
tokenizer.save_pretrained("./finetuned-roberta-sentiment")

  trainer = Trainer(


Step,Training Loss,Validation Loss,Accuracy
416,0.1951,0.160606,0.9384
832,0.1547,0.141089,0.9496
1248,0.083,0.181984,0.9396
1664,0.0797,0.152651,0.9536


('./finetuned-roberta-sentiment/tokenizer_config.json',
 './finetuned-roberta-sentiment/special_tokens_map.json',
 './finetuned-roberta-sentiment/vocab.json',
 './finetuned-roberta-sentiment/merges.txt',
 './finetuned-roberta-sentiment/added_tokens.json',
 './finetuned-roberta-sentiment/tokenizer.json')

In [None]:
# @title Проверяем Roberta

df_all = pd.read_csv('/content/IMDB Dataset.csv')

df_all = df_all.sample(frac=1, random_state=42).reset_index(drop=True)
df_all["label"] = df_all["sentiment"].map({"negative": 0, "positive": 1})

model_path = "/content/finetune-roberta-sentiment"
tokenizer = RobertaTokenizerFast.from_pretrained(model_path)
model = RobertaForSequenceClassification.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

train_df, val_df = train_test_split(df_all, test_size=0.1, random_state=42)

preds, probs = predict_sentiment(val_df["review"].tolist())

true_labels = val_df["label"].tolist()
acc = accuracy_score(true_labels, preds)
print(f"\n✅ Accuracy: {acc:.4f}")
print(classification_report(true_labels, preds, target_names=["negative", "positive"]))

Predicting: 100%|██████████| 40/40 [2:31:57<00:00, 227.94s/it]


✅ Accuracy: 0.9536
              precision    recall  f1-score   support

    negative       0.96      0.95      0.95      2512
    positive       0.95      0.96      0.95      2488

    accuracy                           0.95      5000
   macro avg       0.95      0.95      0.95      5000
weighted avg       0.95      0.95      0.95      5000






In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from tokenizers import Tokenizer, models, trainers, pre_tokenizers

import os

# === Настройки CUDA ===
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Данные ===
df = pd.read_csv("/content/IMDB Dataset.csv")
df['label'] = df['sentiment'].map({'negative': 0, 'positive': 1})
train_df, val_df = train_test_split(df, test_size=0.05, random_state=42)

# === Токенизатор ===
SPECIAL_TOKENS = ["[PAD]", "[UNK]", "[CLS]"]
if not os.path.exists("my_tokenizer.json"):
    tokenizer = Tokenizer(models.BPE(unk_token="[UNK]"))
    tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
    trainer = trainers.BpeTrainer(vocab_size=8000, special_tokens=SPECIAL_TOKENS)
    tokenizer.train_from_iterator(train_df["review"].tolist(), trainer)
    tokenizer.save("my_tokenizer.json")

tokenizer = Tokenizer.from_file("my_tokenizer.json")
vocab_size = tokenizer.get_vocab_size()

# === Гиперпараметры ===
MAX_LEN = 512
BATCH_SIZE = 128
EPOCHS = 4
D_MODEL = 384
N_HEADS = 8
N_LAYERS = 4

# === Датасет ===
class IMDBDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __getitem__(self, idx):
        encoded = tokenizer.encode(self.texts[idx])
        input_ids = [tokenizer.token_to_id("[CLS]")] + encoded.ids[:MAX_LEN - 1]
        attention_mask = [1] * len(input_ids)
        pad_len = MAX_LEN - len(input_ids)
        input_ids += [tokenizer.token_to_id("[PAD]")] * pad_len
        attention_mask += [0] * pad_len

        return {
            "input_ids": torch.tensor(input_ids),
            "attention_mask": torch.tensor(attention_mask),
            "label": torch.tensor(self.labels[idx]),
        }

    def __len__(self):
        return len(self.texts)

train_dataset = IMDBDataset(train_df["review"].tolist(), train_df["label"].tolist())
val_dataset = IMDBDataset(val_df["review"].tolist(), val_df["label"].tolist())

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=2)

# === Positional Encoding ===
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)
        self.register_buffer("pe", pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# === Модель ===
class MiniTransformer(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_layers, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=4 * d_model,
            dropout=0.2,
            activation='gelu',
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(0.2)
        self.cls_fc = nn.Linear(d_model, num_classes)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        x = self.positional_encoding(x)
        attn_mask = ~attention_mask.bool()
        x = self.transformer(x, src_key_padding_mask=attn_mask)
        cls_token = x[:, 0]
        out = self.dropout(self.norm(cls_token))
        return self.cls_fc(out)

# === Инициализация ===
model = MiniTransformer(vocab_size, D_MODEL, N_HEADS, N_LAYERS, 2).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=0.01)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()

# === Обучение ===
for epoch in range(EPOCHS):
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for step, batch in enumerate(loop):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # === Каждые 300 шагов: mini-validation ===
        if (step + 1) % 300 == 0:
            model.eval()
            with torch.no_grad():
                val_preds, val_labels = [], []
                for val_batch in list(val_loader)[:10]:  # Быстрая проверка
                    ids = val_batch["input_ids"].to(device)
                    mask = val_batch["attention_mask"].to(device)
                    lbls = val_batch["label"].to(device)
                    with torch.cuda.amp.autocast():
                        outputs = model(ids, mask)
                    preds = torch.argmax(outputs, dim=1)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(lbls.cpu().numpy())
                val_acc = accuracy_score(val_labels, val_preds)
            loop.set_postfix(loss=loss.item(), val_acc=val_acc)
            model.train()

    # === Полная валидация после эпохи ===
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            ids = batch["input_ids"].to(device)
            mask = batch["attention_mask"].to(device)
            lbls = batch["label"].to(device)
            with torch.cuda.amp.autocast():
                outputs = model(ids, mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(lbls.cpu().numpy())

    epoch_acc = accuracy_score(all_labels, all_preds)
    print(f"\n✅ Epoch {epoch+1} Validation Accuracy: {epoch_acc:.4f}\n")

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
Epoch 1: 100%|██████████| 372/372 [02:47<00:00,  2.22it/s, loss=0.493, val_acc=0.803]
  with torch.cuda.amp.autocast():



✅ Epoch 1 Validation Accuracy: 0.8256



  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
Epoch 2: 100%|██████████| 372/372 [02:47<00:00,  2.21it/s, loss=0.362, val_acc=0.848]
  with torch.cuda.amp.autocast():



✅ Epoch 2 Validation Accuracy: 0.8608



  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
Epoch 3: 100%|██████████| 372/372 [02:47<00:00,  2.22it/s, loss=0.28, val_acc=0.866]
  with torch.cuda.amp.autocast():



✅ Epoch 3 Validation Accuracy: 0.8780



  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
Epoch 4: 100%|██████████| 372/372 [02:47<00:00,  2.22it/s, loss=0.297, val_acc=0.878]
  with torch.cuda.amp.autocast():



✅ Epoch 4 Validation Accuracy: 0.8732



In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from tokenizers import Tokenizer, models, trainers, pre_tokenizers

import os

# === Настройки CUDA ===
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Данные ===
df = pd.read_csv("/content/IMDB Dataset.csv")
df['label'] = df['sentiment'].map({'negative': 0, 'positive': 1})
train_df, val_df = train_test_split(df, test_size=0.05, random_state=42)

# === Токенизатор ===
SPECIAL_TOKENS = ["[PAD]", "[UNK]", "[CLS]"]
if not os.path.exists("my_tokenizer.json"):
    tokenizer = Tokenizer(models.BPE(unk_token="[UNK]"))
    tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
    trainer = trainers.BpeTrainer(vocab_size=16000, special_tokens=SPECIAL_TOKENS)
    tokenizer.train_from_iterator(train_df["review"].tolist(), trainer)
    tokenizer.save("my_tokenizer.json")

tokenizer = Tokenizer.from_file("my_tokenizer.json")
vocab_size = tokenizer.get_vocab_size()

# === Гиперпараметры ===
MAX_LEN = 512
BATCH_SIZE = 256
EPOCHS = 16
D_MODEL = 192
N_HEADS = 4
N_LAYERS = 2

# === Датасет ===
class IMDBDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __getitem__(self, idx):
        encoded = tokenizer.encode(self.texts[idx])
        input_ids = [tokenizer.token_to_id("[CLS]")] + encoded.ids[:MAX_LEN - 1]
        attention_mask = [1] * len(input_ids)
        pad_len = MAX_LEN - len(input_ids)
        input_ids += [tokenizer.token_to_id("[PAD]")] * pad_len
        attention_mask += [0] * pad_len

        return {
            "input_ids": torch.tensor(input_ids),
            "attention_mask": torch.tensor(attention_mask),
            "label": torch.tensor(self.labels[idx]),
        }

    def __len__(self):
        return len(self.texts)

train_dataset = IMDBDataset(train_df["review"].tolist(), train_df["label"].tolist())
val_dataset = IMDBDataset(val_df["review"].tolist(), val_df["label"].tolist())

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=2)

# === Positional Encoding ===
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)
        self.register_buffer("pe", pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# === Модель ===
class MiniTransformer(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_layers, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=4 * d_model,
            dropout=0.3,
            activation='gelu',
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(0.3)
        self.cls_fc = nn.Linear(d_model, num_classes)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)
        x = self.positional_encoding(x)
        attn_mask = ~attention_mask.bool()
        x = self.transformer(x, src_key_padding_mask=attn_mask)
        cls_token = x[:, 0]
        out = self.dropout(self.norm(cls_token))
        return self.cls_fc(out)

# === Инициализация ===
model = MiniTransformer(vocab_size, D_MODEL, N_HEADS, N_LAYERS, 2).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=0.01)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()

# === Обучение ===
for epoch in range(EPOCHS):
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for step, batch in enumerate(loop):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # === Каждые 300 шагов: mini-validation ===
        if (step + 1) % 1000 == 0:
            model.eval()
            with torch.no_grad():
                val_preds, val_labels = [], []
                for val_batch in list(val_loader)[:10]:  # Быстрая проверка
                    ids = val_batch["input_ids"].to(device)
                    mask = val_batch["attention_mask"].to(device)
                    lbls = val_batch["label"].to(device)
                    with torch.cuda.amp.autocast():
                        outputs = model(ids, mask)
                    preds = torch.argmax(outputs, dim=1)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(lbls.cpu().numpy())
                val_acc = accuracy_score(val_labels, val_preds)
            loop.set_postfix(loss=loss.item(), val_acc=val_acc)
            model.train()

    # === Полная валидация после эпохи ===
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            ids = batch["input_ids"].to(device)
            mask = batch["attention_mask"].to(device)
            lbls = batch["label"].to(device)
            with torch.cuda.amp.autocast():
                outputs = model(ids, mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(lbls.cpu().numpy())

    epoch_acc = accuracy_score(all_labels, all_preds)
    print(f"\n✅ Epoch {epoch+1} Validation Accuracy: {epoch_acc:.4f}\n")

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Epoch 1: 100%|██████████| 186/186 [00:46<00:00,  4.03it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 1 Validation Accuracy: 0.7432



  with torch.cuda.amp.autocast():
Epoch 2: 100%|██████████| 186/186 [00:48<00:00,  3.84it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 2 Validation Accuracy: 0.7972



  with torch.cuda.amp.autocast():
Epoch 3: 100%|██████████| 186/186 [00:48<00:00,  3.83it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 3 Validation Accuracy: 0.8416



  with torch.cuda.amp.autocast():
Epoch 4: 100%|██████████| 186/186 [00:47<00:00,  3.91it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 4 Validation Accuracy: 0.8420



  with torch.cuda.amp.autocast():
Epoch 5: 100%|██████████| 186/186 [00:47<00:00,  3.89it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 5 Validation Accuracy: 0.8672



  with torch.cuda.amp.autocast():
Epoch 6: 100%|██████████| 186/186 [00:48<00:00,  3.86it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 6 Validation Accuracy: 0.8692



  with torch.cuda.amp.autocast():
Epoch 7: 100%|██████████| 186/186 [00:48<00:00,  3.87it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 7 Validation Accuracy: 0.8616



  with torch.cuda.amp.autocast():
Epoch 8: 100%|██████████| 186/186 [00:48<00:00,  3.87it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 8 Validation Accuracy: 0.8752



  with torch.cuda.amp.autocast():
Epoch 9: 100%|██████████| 186/186 [00:47<00:00,  3.89it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 9 Validation Accuracy: 0.8768



  with torch.cuda.amp.autocast():
Epoch 10: 100%|██████████| 186/186 [00:48<00:00,  3.84it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 10 Validation Accuracy: 0.8748



  with torch.cuda.amp.autocast():
Epoch 11: 100%|██████████| 186/186 [00:48<00:00,  3.84it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 11 Validation Accuracy: 0.8800



  with torch.cuda.amp.autocast():
Epoch 12: 100%|██████████| 186/186 [00:47<00:00,  3.95it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 12 Validation Accuracy: 0.8832



  with torch.cuda.amp.autocast():
Epoch 13: 100%|██████████| 186/186 [00:47<00:00,  3.94it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 13 Validation Accuracy: 0.8780



  with torch.cuda.amp.autocast():
Epoch 14: 100%|██████████| 186/186 [00:47<00:00,  3.89it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 14 Validation Accuracy: 0.8808



  with torch.cuda.amp.autocast():
Epoch 15: 100%|██████████| 186/186 [00:47<00:00,  3.90it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 15 Validation Accuracy: 0.8776



  with torch.cuda.amp.autocast():
Epoch 16: 100%|██████████| 186/186 [00:48<00:00,  3.85it/s]
  with torch.cuda.amp.autocast():



✅ Epoch 16 Validation Accuracy: 0.8808

