In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import json
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.nn.functional as F
from collections import Counter

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Dataset Class
class TACODataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        try:
            image = Image.open(img_path).convert("RGB")
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            blank_image = Image.new('RGB', (224, 224), color='white')
            if self.transform:
                blank_image = self.transform(blank_image)
            else:
                blank_image = torch.zeros(3, 224, 224)
            return blank_image, label

# Simplified Multi-Head Self Attention
class EnhancedMultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        
        assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
        
        self.qkv = nn.Linear(embed_dim, embed_dim * 3)
        self.attn_dropout = nn.Dropout(dropout)
        self.proj = nn.Linear(embed_dim, embed_dim)
        self.proj_dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        
        attn = (q @ k.transpose(-2, -1)) * (self.head_dim ** -0.5)
        attn = attn.softmax(dim=-1)
        attn = self.attn_dropout(attn)
        
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_dropout(x)
        return x

# Simplified Spatial Attention
class EnhancedSpatialAttention(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_attn = torch.cat([avg_out, max_out], dim=1)
        x_attn = self.conv(x_attn)
        return x * self.sigmoid(x_attn)

# Simplified Residual Block
class EnhancedResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, use_attention=True):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        
        self.attention = EnhancedSpatialAttention(out_channels) if use_attention else nn.Identity()
        
    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.attention(out)
        out += self.shortcut(residual)
        out = F.relu(out)
        return out

# Fixed Hybrid Model
class UltraDeepHybridCNNTransformer(nn.Module):
    def __init__(self, num_classes=5, img_size=224, patch_size=7, embed_dim=512, depth=4, num_heads=8):
        super().__init__()
        
        # CNN Backbone
        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # 56x56
        )
        
        self.block1 = nn.Sequential(
            EnhancedResidualBlock(64, 64),
            EnhancedResidualBlock(64, 128, stride=2)  # 28x28
        )
        
        self.block2 = nn.Sequential(
            EnhancedResidualBlock(128, 128),
            EnhancedResidualBlock(128, 256, stride=2)  # 14x14
        )
        
        self.block3 = nn.Sequential(
            EnhancedResidualBlock(256, 256),
            EnhancedResidualBlock(256, 512, stride=1)  # 14x14
        )
        
        # Patch embedding
        self.patch_size = patch_size
        self.num_patches = (14 // patch_size) ** 2
        self.patch_embed = nn.Conv2d(512, embed_dim, kernel_size=patch_size, stride=patch_size)
        
        # Positional embedding and CLS token
        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches + 1, embed_dim))
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        
        # Transformer layers
        self.transformer_layers = nn.ModuleList([
            nn.ModuleDict({
                'norm1': nn.LayerNorm(embed_dim),
                'attn': EnhancedMultiHeadSelfAttention(embed_dim, num_heads),
                'norm2': nn.LayerNorm(embed_dim),
                'mlp': nn.Sequential(
                    nn.Linear(embed_dim, embed_dim * 4),
                    nn.GELU(),
                    nn.Linear(embed_dim * 4, embed_dim),
                )
            }) for _ in range(depth)
        ])
        
        # Classification head
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        for name, m in self.named_modules():
            if isinstance(m, nn.Linear):
                nn.init.trunc_normal_(m.weight, std=0.02)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.LayerNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
        
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        nn.init.trunc_normal_(self.cls_token, std=0.02)

    def forward(self, x):
        # CNN feature extraction
        x = self.stem(x)      # 56x56
        x = self.block1(x)    # 28x28
        x = self.block2(x)    # 14x14
        x = self.block3(x)    # 14x14
        
        # Patch embedding
        x = self.patch_embed(x)  # (B, embed_dim, num_patches_h, num_patches_w)
        x = x.flatten(2).transpose(1, 2)  # (B, num_patches, embed_dim)
        
        # Add CLS token and positional embedding
        cls_tokens = self.cls_token.expand(x.shape[0], -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        
        # Transformer encoding
        for layer in self.transformer_layers:
            x = x + layer['attn'](layer['norm1'](x))
            x = x + layer['mlp'](layer['norm2'](x))
        
        # Classification
        x = self.norm(x[:, 0])  # CLS token
        return self.head(x)

# Data Loading and Balancing
def load_and_balance_taco_data(data_dir, anno_path):
    with open(anno_path, "r") as f:
        annotations = json.load(f)
    
    category_mapping = {
        "plastic": 0, "metal": 1, "paper": 2, "glass": 3, "organic": 4,
        "bottle": 0, "can": 1, "cardboard": 2, "jar": 3, "food": 4
    }
    
    image_paths = []
    labels = []
    cat_id_to_name = {cat['id']: cat['name'] for cat in annotations['categories']}
    
    for img in annotations['images']:
        img_path = os.path.join(data_dir, img['file_name'])
        if os.path.exists(img_path):
            for ann in annotations['annotations']:
                if ann['image_id'] == img['id']:
                    category_name = cat_id_to_name[ann['category_id']].lower()
                    for key, value in category_mapping.items():
                        if key in category_name:
                            image_paths.append(img_path)
                            labels.append(value)
                            break
    
    # Convert to numpy arrays for balancing
    image_paths_np = np.array(image_paths)
    labels_np = np.array(labels)
    
    # Balance classes
    unique_classes, class_counts = np.unique(labels_np, return_counts=True)
    max_samples = max(class_counts) * 2  # Oversample
    
    balanced_paths = []
    balanced_labels = []
    
    for cls in unique_classes:
        cls_mask = (labels_np == cls)
        cls_paths = image_paths_np[cls_mask]
        cls_labels = labels_np[cls_mask]
        
        upsampled_paths, upsampled_labels = resample(
            cls_paths, cls_labels, 
            n_samples=max_samples, 
            random_state=42,
            replace=True
        )
        
        balanced_paths.extend(upsampled_paths.tolist())
        balanced_labels.extend(upsampled_labels.tolist())
    
    return balanced_paths, balanced_labels

# Training Function
def train_model(model, train_loader, val_loader, num_epochs=50):
    # Class weighting
    class_counts = Counter(train_loader.dataset.labels)
    total_samples = sum(class_counts.values())
    class_weights = torch.tensor([total_samples / class_counts[i] for i in range(len(class_counts))]).float().to(device)
    
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.5, verbose=True)
    
    train_acc = []
    val_acc = []
    best_val_acc = 0.0
    patience = 10
    patience_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        correct = 0
        total = 0
        
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            progress_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Acc': f'{100 * correct / total:.2f}%'
            })
        
        train_acc.append(100 * correct / total)
        
        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_acc.append(100 * val_correct / val_total)
        scheduler.step(val_acc[-1])
        
        # Early stopping
        if val_acc[-1] > best_val_acc:
            best_val_acc = val_acc[-1]
            torch.save(model.state_dict(), 'best_model.pth')
            patience_counter = 0
        else:
            patience_counter += 1
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Acc: {train_acc[-1]:.2f}%, Val Acc: {val_acc[-1]:.2f}%")
        print(f"Best Val Acc: {best_val_acc:.2f}%")
        print("-" * 50)
        
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
    
    # Plot training curves
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_acc, label='Train Acc')
    plt.plot(val_acc, label='Val Acc')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('training_curves.png')
    plt.show()
    
    return best_val_acc

def main():
    # Data paths
    data_dir = "/kaggle/input/tacotrashdataset/data"
    anno_path = "/kaggle/input/tacotrashdataset/data/annotations.json"

    # Load and balance data
    print("Loading and balancing TACO dataset...")
    image_paths, labels = load_and_balance_taco_data(data_dir, anno_path)
    
    if not image_paths:
        raise ValueError("No valid images found in dataset")

    print(f"Loaded {len(image_paths)} images after balancing")
    
    # Get number of classes
    unique_labels = sorted(list(set(labels)))
    num_classes = len(unique_labels)
    print(f"Number of classes found: {num_classes}")
    print(f"Classes: {unique_labels}")

    # Data transforms
    train_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(45),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.3, scale=(0.02, 0.33), ratio=(0.3, 3.3))
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Split data
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        image_paths, labels, test_size=0.2, stratify=labels, random_state=42
    )
    val_paths, test_paths, val_labels, test_labels = train_test_split(
        test_paths, test_labels, test_size=0.5, stratify=test_labels, random_state=42
    )

    # Create datasets
    train_dataset = TACODataset(train_paths, train_labels, transform=train_transform)
    val_dataset = TACODataset(val_paths, val_labels, transform=val_transform)
    test_dataset = TACODataset(test_paths, test_labels, transform=val_transform)

    # Data loaders
    train_loader = DataLoader(
        train_dataset, batch_size=32, shuffle=True,
        num_workers=4, pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset, batch_size=32, shuffle=False,
        num_workers=4, pin_memory=True
    )
    test_loader = DataLoader(
        test_dataset, batch_size=32, shuffle=False,
        num_workers=4, pin_memory=True
    )

    # Model
    print("Creating simplified hybrid model...")
    model = UltraDeepHybridCNNTransformer(
        num_classes=num_classes,
        embed_dim=512,
        depth=4,
        num_heads=8,
        patch_size=7
    ).to(device)

    # Model summary
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")

    # Train model
    print("\nStarting training...")
    best_val_acc = train_model(model, train_loader, val_loader, num_epochs=50)
    print(f"Best validation accuracy: {best_val_acc:.2f}%")

    # Load best model
    model.load_state_dict(torch.load('best_model.pth'))

    # Evaluation
    print("\nEvaluating on test set...")
    model.eval()
    predictions = []
    true_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Testing"):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.numpy())

    # Metrics
    acc = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions, average='weighted')
    print(f"\nTest Accuracy: {acc:.4f}")
    print(f"Weighted F1 Score: {f1:.4f}")

    # Classification report
    class_names = ['Plastic', 'Metal', 'Paper', 'Glass', 'Organic']
    actual_class_names = [class_names[i] for i in unique_labels]
    
    print("\nClassification Report:")
    print(classification_report(
        true_labels, predictions, 
        labels=unique_labels,
        target_names=actual_class_names
    ))

    # Confusion Matrix
    cm = confusion_matrix(true_labels, predictions, labels=unique_labels)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=actual_class_names, 
                yticklabels=actual_class_names)
    plt.title(f'Confusion Matrix\nTest Accuracy: {acc:.3f}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.show()

if __name__ == "__main__":
    main()

Using device: cuda
Loading and balancing TACO dataset...
Loaded 19150 images after balancing
Number of classes found: 5
Classes: [0, 1, 2, 3, 4]
Creating simplified hybrid model...
Total parameters: 31,844,241
Trainable parameters: 31,844,241

Starting training...


Epoch 1/50: 100%|██████████| 479/479 [10:36<00:00,  1.33s/it, Loss=1.4871, Acc=25.93%]


Epoch 1/50
Train Acc: 25.93%, Val Acc: 30.86%
Best Val Acc: 30.86%
--------------------------------------------------


Epoch 2/50: 100%|██████████| 479/479 [10:34<00:00,  1.32s/it, Loss=1.4142, Acc=32.52%]


Epoch 2/50
Train Acc: 32.52%, Val Acc: 40.57%
Best Val Acc: 40.57%
--------------------------------------------------


Epoch 3/50: 100%|██████████| 479/479 [10:33<00:00,  1.32s/it, Loss=1.3537, Acc=36.93%]


Epoch 3/50
Train Acc: 36.93%, Val Acc: 44.96%
Best Val Acc: 44.96%
--------------------------------------------------


Epoch 4/50: 100%|██████████| 479/479 [10:37<00:00,  1.33s/it, Loss=1.3104, Acc=39.54%]


Epoch 4/50
Train Acc: 39.54%, Val Acc: 45.33%
Best Val Acc: 45.33%
--------------------------------------------------


Epoch 5/50: 100%|██████████| 479/479 [10:59<00:00,  1.38s/it, Loss=1.5866, Acc=42.23%]


Epoch 5/50
Train Acc: 42.23%, Val Acc: 49.87%
Best Val Acc: 49.87%
--------------------------------------------------


Epoch 6/50: 100%|██████████| 479/479 [10:54<00:00,  1.37s/it, Loss=1.5455, Acc=44.34%]


Epoch 6/50
Train Acc: 44.34%, Val Acc: 48.41%
Best Val Acc: 49.87%
--------------------------------------------------


Epoch 7/50: 100%|██████████| 479/479 [10:54<00:00,  1.37s/it, Loss=1.5101, Acc=46.37%]


Epoch 7/50
Train Acc: 46.37%, Val Acc: 53.32%
Best Val Acc: 53.32%
--------------------------------------------------


Epoch 8/50: 100%|██████████| 479/479 [10:57<00:00,  1.37s/it, Loss=1.1311, Acc=48.51%]


Epoch 8/50
Train Acc: 48.51%, Val Acc: 55.67%
Best Val Acc: 55.67%
--------------------------------------------------


Epoch 9/50: 100%|██████████| 479/479 [10:57<00:00,  1.37s/it, Loss=1.4632, Acc=50.60%]


Epoch 9/50
Train Acc: 50.60%, Val Acc: 57.60%
Best Val Acc: 57.60%
--------------------------------------------------


Epoch 10/50: 100%|██████████| 479/479 [11:03<00:00,  1.38s/it, Loss=1.0276, Acc=51.30%]


Epoch 10/50
Train Acc: 51.30%, Val Acc: 58.96%
Best Val Acc: 58.96%
--------------------------------------------------


Epoch 11/50: 100%|██████████| 479/479 [10:49<00:00,  1.36s/it, Loss=1.4062, Acc=52.50%]


Epoch 11/50
Train Acc: 52.50%, Val Acc: 58.80%
Best Val Acc: 58.96%
--------------------------------------------------


Epoch 12/50: 100%|██████████| 479/479 [10:59<00:00,  1.38s/it, Loss=0.9381, Acc=54.28%]


Epoch 12/50
Train Acc: 54.28%, Val Acc: 57.96%
Best Val Acc: 58.96%
--------------------------------------------------


Epoch 13/50: 100%|██████████| 479/479 [10:50<00:00,  1.36s/it, Loss=0.9685, Acc=55.08%]


Epoch 13/50
Train Acc: 55.08%, Val Acc: 61.78%
Best Val Acc: 61.78%
--------------------------------------------------


Epoch 14/50: 100%|██████████| 479/479 [10:49<00:00,  1.36s/it, Loss=1.1410, Acc=55.50%]


Epoch 14/50
Train Acc: 55.50%, Val Acc: 60.84%
Best Val Acc: 61.78%
--------------------------------------------------


Epoch 15/50: 100%|██████████| 479/479 [10:48<00:00,  1.35s/it, Loss=1.0414, Acc=56.76%]


Epoch 15/50
Train Acc: 56.76%, Val Acc: 64.44%
Best Val Acc: 64.44%
--------------------------------------------------


Epoch 16/50: 100%|██████████| 479/479 [10:59<00:00,  1.38s/it, Loss=0.8399, Acc=58.30%]


Epoch 16/50
Train Acc: 58.30%, Val Acc: 62.51%
Best Val Acc: 64.44%
--------------------------------------------------


Epoch 17/50: 100%|██████████| 479/479 [11:00<00:00,  1.38s/it, Loss=0.8600, Acc=58.49%]


Epoch 17/50
Train Acc: 58.49%, Val Acc: 65.48%
Best Val Acc: 65.48%
--------------------------------------------------


Epoch 18/50: 100%|██████████| 479/479 [10:58<00:00,  1.37s/it, Loss=0.8684, Acc=59.51%]


Epoch 18/50
Train Acc: 59.51%, Val Acc: 68.15%
Best Val Acc: 68.15%
--------------------------------------------------


Epoch 19/50: 100%|██████████| 479/479 [10:57<00:00,  1.37s/it, Loss=0.9139, Acc=59.92%]


Epoch 19/50
Train Acc: 59.92%, Val Acc: 66.58%
Best Val Acc: 68.15%
--------------------------------------------------


Epoch 20/50: 100%|██████████| 479/479 [10:55<00:00,  1.37s/it, Loss=0.5949, Acc=61.02%]


Epoch 20/50
Train Acc: 61.02%, Val Acc: 67.36%
Best Val Acc: 68.15%
--------------------------------------------------


Epoch 21/50: 100%|██████████| 479/479 [10:45<00:00,  1.35s/it, Loss=1.7531, Acc=61.93%]


Epoch 21/50
Train Acc: 61.93%, Val Acc: 67.57%
Best Val Acc: 68.15%
--------------------------------------------------


Epoch 22/50: 100%|██████████| 479/479 [10:58<00:00,  1.37s/it, Loss=0.9977, Acc=62.63%]


Epoch 22/50
Train Acc: 62.63%, Val Acc: 72.85%
Best Val Acc: 72.85%
--------------------------------------------------


Epoch 23/50: 100%|██████████| 479/479 [11:20<00:00,  1.42s/it, Loss=0.8094, Acc=63.89%]


Epoch 23/50
Train Acc: 63.89%, Val Acc: 71.49%
Best Val Acc: 72.85%
--------------------------------------------------


Epoch 24/50: 100%|██████████| 479/479 [11:20<00:00,  1.42s/it, Loss=1.0469, Acc=63.98%]


Epoch 24/50
Train Acc: 63.98%, Val Acc: 72.06%
Best Val Acc: 72.85%
--------------------------------------------------


Epoch 25/50: 100%|██████████| 479/479 [11:15<00:00,  1.41s/it, Loss=0.9727, Acc=64.65%]


Epoch 25/50
Train Acc: 64.65%, Val Acc: 73.32%
Best Val Acc: 73.32%
--------------------------------------------------


Epoch 26/50: 100%|██████████| 479/479 [11:16<00:00,  1.41s/it, Loss=0.6220, Acc=65.61%]


Epoch 26/50
Train Acc: 65.61%, Val Acc: 71.17%
Best Val Acc: 73.32%
--------------------------------------------------


Epoch 27/50: 100%|██████████| 479/479 [11:15<00:00,  1.41s/it, Loss=1.0185, Acc=65.91%]


Epoch 27/50
Train Acc: 65.91%, Val Acc: 70.50%
Best Val Acc: 73.32%
--------------------------------------------------


Epoch 28/50: 100%|██████████| 479/479 [11:00<00:00,  1.38s/it, Loss=0.8193, Acc=66.91%]


Epoch 28/50
Train Acc: 66.91%, Val Acc: 73.16%
Best Val Acc: 73.32%
--------------------------------------------------


Epoch 29/50: 100%|██████████| 479/479 [11:07<00:00,  1.39s/it, Loss=0.9119, Acc=67.44%]


Epoch 29/50
Train Acc: 67.44%, Val Acc: 73.84%
Best Val Acc: 73.84%
--------------------------------------------------


Epoch 30/50: 100%|██████████| 479/479 [11:02<00:00,  1.38s/it, Loss=1.3879, Acc=67.69%]


Epoch 30/50
Train Acc: 67.69%, Val Acc: 73.73%
Best Val Acc: 73.84%
--------------------------------------------------


Epoch 31/50: 100%|██████████| 479/479 [11:06<00:00,  1.39s/it, Loss=0.7174, Acc=68.58%]


Epoch 31/50
Train Acc: 68.58%, Val Acc: 71.75%
Best Val Acc: 73.84%
--------------------------------------------------


Epoch 32/50: 100%|██████████| 479/479 [11:18<00:00,  1.42s/it, Loss=0.5920, Acc=68.94%]


Epoch 32/50
Train Acc: 68.94%, Val Acc: 74.78%
Best Val Acc: 74.78%
--------------------------------------------------


Epoch 33/50: 100%|██████████| 479/479 [11:18<00:00,  1.42s/it, Loss=0.6462, Acc=68.75%]


Epoch 33/50
Train Acc: 68.75%, Val Acc: 75.35%
Best Val Acc: 75.35%
--------------------------------------------------


Epoch 34/50: 100%|██████████| 479/479 [11:12<00:00,  1.40s/it, Loss=0.9881, Acc=69.99%]


Epoch 34/50
Train Acc: 69.99%, Val Acc: 78.59%
Best Val Acc: 78.59%
--------------------------------------------------


Epoch 35/50: 100%|██████████| 479/479 [10:49<00:00,  1.36s/it, Loss=0.9932, Acc=69.98%]


Epoch 35/50
Train Acc: 69.98%, Val Acc: 78.17%
Best Val Acc: 78.59%
--------------------------------------------------


Epoch 36/50: 100%|██████████| 479/479 [10:41<00:00,  1.34s/it, Loss=0.7637, Acc=70.92%]


Epoch 36/50
Train Acc: 70.92%, Val Acc: 78.12%
Best Val Acc: 78.59%
--------------------------------------------------


Epoch 37/50: 100%|██████████| 479/479 [10:43<00:00,  1.34s/it, Loss=0.4127, Acc=70.82%]


Epoch 37/50
Train Acc: 70.82%, Val Acc: 78.59%
Best Val Acc: 78.59%
--------------------------------------------------


Epoch 38/50: 100%|██████████| 479/479 [10:58<00:00,  1.38s/it, Loss=0.6533, Acc=71.64%]


Epoch 38/50
Train Acc: 71.64%, Val Acc: 76.71%
Best Val Acc: 78.59%
--------------------------------------------------


Epoch 39/50: 100%|██████████| 479/479 [10:48<00:00,  1.35s/it, Loss=0.6408, Acc=74.03%]


Epoch 39/50
Train Acc: 74.03%, Val Acc: 79.32%
Best Val Acc: 79.32%
--------------------------------------------------


Epoch 40/50: 100%|██████████| 479/479 [10:47<00:00,  1.35s/it, Loss=0.5803, Acc=74.85%]


Epoch 40/50
Train Acc: 74.85%, Val Acc: 78.17%
Best Val Acc: 79.32%
--------------------------------------------------


Epoch 41/50:  61%|██████    | 292/479 [06:36<02:37,  1.19it/s, Loss=0.5367, Acc=74.91%]