In [13]:
import os
import glob
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
import torchvision.models as models
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm
import timm

In [14]:
# ===== Config =====
DATA_ROOT = './osv5m/'  # <-- update this!
TRAIN_CSV = os.path.join(DATA_ROOT, 'train_mini.csv')
TEST_CSV = os.path.join(DATA_ROOT, 'test_mini.csv')
TRAIN_IMG_DIR = os.path.join(DATA_ROOT, 'train_images')
TEST_IMG_DIR = os.path.join(DATA_ROOT, 'test_images')
EPOCHS = 10
BATCH_SIZE = 32

# 1) Build a global mapping from cell → index using the training CSV
train_df    = pd.read_csv(TRAIN_CSV)
cells, classes  = pd.factorize(train_df['cell'])
class_to_idx    = {cell: idx for idx, cell in enumerate(classes)}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ===== Dataset =====
class GeoDataset(Dataset):
    def __init__(self, csv_path, images_root, class_to_idx, transforms=None):
        # load annotations
        self.df = pd.read_csv(csv_path)

        # map 'cell' to the consistent label index; drop any rows not seen in training
        self.df['label'] = self.df['cell'].map(class_to_idx)
        self.df = self.df[self.df['label'].notna()].reset_index(drop=True)
        self.df['label'] = self.df['label'].astype(int)

        # share the same classes list
        self.classes = classes

        # build a map from image‐ID → full path
        all_files = glob.glob(os.path.join(images_root, '*', '*.jpg'))
        self.id2path = {
            os.path.splitext(os.path.basename(p))[0]: p
            for p in all_files
        }
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row    = self.df.iloc[idx]
        img_id = str(row['id'])
        label  = int(row['label'])
        img    = Image.open(self.id2path[img_id]).convert('RGB')
        if self.transforms:
            img = self.transforms(img)
        return img, label

In [15]:
# --- Hybrid CNN + ViT Model ---
class CNN_ViT_Hybrid(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        # Conv feature extractor (ConvNeXt-T or ResNet50)
        self.cnn = timm.create_model("convnext_tiny", pretrained=True, features_only=True)
        cnn_out_channels = self.cnn.feature_info[-1]['num_chs']

        # ViT block (tiny patch-based attention)
        self.vit = timm.create_model("vit_small_patch16_224", pretrained=True)
        self.vit.head = nn.Identity()  # remove classifier

        # Fusion + Classifier
        self.pool = nn.AdaptiveAvgPool2d((14, 14))
        self.proj = nn.Linear(cnn_out_channels, self.vit.embed_dim)

        self.classifier = nn.Linear(self.vit.embed_dim, num_classes)

    def forward(self, x):
        # CNN backbone
        x = self.cnn(x)[-1]  # shape (B, C, H, W)

        # Pool to fixed size
        x = self.pool(x)  # shape (B, C, 14, 14)

        # Flatten and transpose to match ViT input
        x = x.flatten(2).transpose(1, 2)  # (B, C, H*W) -> (B, H*W, C)

        # Project to ViT embedding dim
        x = self.proj(x)  # shape (B, 196, D)

        # Feed through ViT encoder blocks
        x = self.vit.blocks(x)
        x = x.mean(dim=1)  # Global average pooling

        return self.classifier(x)


In [16]:
# --- Transforms ---
train_transforms = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_transforms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- Load Data ---
train_ds = GeoDataset(TRAIN_CSV, TRAIN_IMG_DIR, class_to_idx, train_transforms)
test_ds  = GeoDataset(TEST_CSV,  TEST_IMG_DIR,  class_to_idx, test_transforms)

num_val = int(0.2 * len(train_ds))
num_train = len(train_ds) - num_val
train_subset, val_subset = random_split(train_ds, [num_train, num_val], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_subset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,      batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# --- Model ---
model = CNN_ViT_Hybrid(num_classes=len(train_ds.classes)).to(device)
criterion = nn.CrossEntropyLoss()
# # Freeze CNN
# for param in model.cnn.parameters():
#     param.requires_grad = False
# Create optimizer only for trainable layers (ViT + classifier)
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4
)


In [18]:
# --- Training & Eval ---
def train_one_epoch(model, loader):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        correct += outputs.argmax(1).eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def evaluate(model, loader):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            correct += outputs.argmax(1).eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

def plot_confusion_matrix(model, loader, class_names):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            outputs = model(images)
            preds.extend(outputs.argmax(1).cpu().numpy())
            labels.extend(targets.numpy())
    cm = confusion_matrix(labels, preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(xticks_rotation=45, cmap='Blues')
    plt.title("Confusion Matrix")
    plt.show()

model.load_state_dict(torch.load("no-freeze\hybrid_best_model_epoch4.pth"))
model.to(device)

best_val_acc = 0.0
best_val_loss = float('inf')
# --- Training Loop ---
for epoch in range(4, EPOCHS):
    print(f"\n🌍 Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc     = evaluate(model, val_loader)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    if val_loss < best_val_loss or val_acc > best_val_acc:
        best_val_loss = val_loss
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"hybrid_best_model_epoch{epoch+1}.pth")
        print("✅ Saved best model.")

# --- Final Test ---
test_loss, test_acc = evaluate(model, test_loader)
print(f"\n✅ Final Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")

  model.load_state_dict(torch.load("no-freeze\hybrid_best_model_epoch4.pth"))



🌍 Epoch 5/10


100%|██████████| 2976/2976 [33:33<00:00,  1.48it/s]


Train Loss: 0.2951, Acc: 0.9032
Val   Loss: 0.4936, Acc: 0.8584
✅ Saved best model.

🌍 Epoch 6/10


  6%|▌         | 178/2976 [01:42<26:43,  1.74it/s]


KeyboardInterrupt: 

First run

🌍 Epoch 1/10
100%|██████████| 2976/2976 [33:23<00:00,  1.49it/s]
Train Loss: 1.7363, Acc: 0.5185
Val   Loss: 0.9604, Acc: 0.7082
✅ Saved best model.

🌍 Epoch 2/10
100%|██████████| 2976/2976 [31:51<00:00,  1.56it/s]
Train Loss: 0.7320, Acc: 0.7714
Val   Loss: 0.7235, Acc: 0.7798
✅ Saved best model.

🌍 Epoch 3/10
100%|██████████| 2976/2976 [32:06<00:00,  1.54it/s]
Train Loss: 0.4849, Acc: 0.8439
Val   Loss: 0.6269, Acc: 0.8156
✅ Saved best model.

🌍 Epoch 4/10
100%|██████████| 2976/2976 [28:39<00:00,  1.73it/s]
Train Loss: 0.3557, Acc: 0.8834
Val   Loss: 0.6067, Acc: 0.8222
✅ Saved best model.

🌍 Epoch 5/10
100%|██████████| 2976/2976 [42:12<00:00,  1.18it/s]
Train Loss: 0.2772, Acc: 0.9089
Val   Loss: 0.4717, Acc: 0.8597
✅ Saved best model.

🌍 Epoch 6/10
100%|██████████| 2976/2976 [39:03<00:00,  1.27it/s]
Train Loss: 0.2219, Acc: 0.9272
Val   Loss: 0.5162, Acc: 0.8593

🌍 Epoch 7/10
  4%|▍         | 129/2976 [01:15<27:44,  1.71it/s]

In [None]:
# Fine Tune with frozen layers
# Load the best model from the previous training
model.load_state_dict(torch.load('no-freeze\hybrid_best_model_epoch5.pth'))
model.to(device)

for param in model.cnn.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True


# Only params that require gradients
trainable_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(trainable_params, lr=1e-5) # reinitialize optimizer

# --- Fine-tuning with frozen layers ---
fine_tune_epochs = 3
start_epoch = 6
best_val_loss = 0.4936
best_val_acc = 0.8584

for epoch in range(start_epoch, start_epoch + fine_tune_epochs):
    print(f"\n🎯 Fine-tuning Epoch {epoch}/{start_epoch + fine_tune_epochs - 1}")
    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc     = evaluate(model, val_loader)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    if val_loss < best_val_loss or val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"hybrid_best_model_epoch{epoch+1}.pth")
        print("✅ Saved best model.")


  model.load_state_dict(torch.load('no-freeze\hybrid_best_model_epoch5.pth'))



🎯 Fine-tuning Epoch 6/8


 83%|████████▎ | 2482/2976 [18:44<04:20,  1.90it/s]

In [16]:
# --- Training & Eval ---
def train_one_epoch(model, loader):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        correct += outputs.argmax(1).eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def evaluate(model, loader):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            correct += outputs.argmax(1).eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

# --- Training Loop ---
best_val_acc = 0.0
for epoch in range(EPOCHS):
    print(f"\n🌍 Epoch {epoch+1}/{EPOCHS}")
    
    # Unfreeze after 3 epochs
    if epoch == 3:
        print("🔓 Unfreezing CNN...")
        for param in model.cnn.parameters():
            param.requires_grad = True
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5
        )

    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc     = evaluate(model, val_loader)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"hybrid_best_model_epoch{epoch+1}.pth")
        print("✅ Saved best model.")

# --- Final Test ---
test_loss, test_acc = evaluate(model, test_loader)
print(f"\n✅ Final Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")


🌍 Epoch 1/10


100%|██████████| 2976/2976 [19:02<00:00,  2.61it/s]


Train Loss: 2.0685, Acc: 0.4334
Val   Loss: 1.3869, Acc: 0.5857
✅ Saved best model.

🌍 Epoch 2/10


100%|██████████| 2976/2976 [18:50<00:00,  2.63it/s]


Train Loss: 1.0890, Acc: 0.6670
Val   Loss: 1.0401, Acc: 0.6834
✅ Saved best model.

🌍 Epoch 3/10


100%|██████████| 2976/2976 [18:37<00:00,  2.66it/s]


Train Loss: 0.7235, Acc: 0.7690
Val   Loss: 0.8748, Acc: 0.7348
✅ Saved best model.

🌍 Epoch 4/10
🔓 Unfreezing CNN...


100%|██████████| 2976/2976 [27:59<00:00,  1.77it/s]


Train Loss: 0.4096, Acc: 0.8636
Val   Loss: 0.5637, Acc: 0.8231
✅ Saved best model.

🌍 Epoch 5/10


100%|██████████| 2976/2976 [28:01<00:00,  1.77it/s]


Train Loss: 0.2461, Acc: 0.9162
Val   Loss: 0.5193, Acc: 0.8409
✅ Saved best model.

🌍 Epoch 6/10


100%|██████████| 2976/2976 [28:04<00:00,  1.77it/s]


Train Loss: 0.1607, Acc: 0.9453
Val   Loss: 0.5365, Acc: 0.8468
✅ Saved best model.

🌍 Epoch 7/10


100%|██████████| 2976/2976 [28:06<00:00,  1.77it/s]


Train Loss: 0.1052, Acc: 0.9653
Val   Loss: 0.5558, Acc: 0.8502
✅ Saved best model.

🌍 Epoch 8/10


100%|██████████| 2976/2976 [28:22<00:00,  1.75it/s]


Train Loss: 0.0784, Acc: 0.9742
Val   Loss: 0.5883, Acc: 0.8461

🌍 Epoch 9/10


  5%|▍         | 144/2976 [01:22<27:07,  1.74it/s]


KeyboardInterrupt: 

🌍 Epoch 1/10
100%|██████████| 2976/2976 [19:02<00:00,  2.61it/s]
Train Loss: 2.0685, Acc: 0.4334
Val   Loss: 1.3869, Acc: 0.5857
✅ Saved best model.

🌍 Epoch 2/10
100%|██████████| 2976/2976 [18:50<00:00,  2.63it/s]
Train Loss: 1.0890, Acc: 0.6670
Val   Loss: 1.0401, Acc: 0.6834
✅ Saved best model.

🌍 Epoch 3/10
100%|██████████| 2976/2976 [18:37<00:00,  2.66it/s]
Train Loss: 0.7235, Acc: 0.7690
Val   Loss: 0.8748, Acc: 0.7348
✅ Saved best model.

🌍 Epoch 4/10
🔓 Unfreezing CNN...
100%|██████████| 2976/2976 [27:59<00:00,  1.77it/s]
Train Loss: 0.4096, Acc: 0.8636
Val   Loss: 0.5637, Acc: 0.8231
✅ Saved best model.

🌍 Epoch 5/10
100%|██████████| 2976/2976 [28:01<00:00,  1.77it/s]
Train Loss: 0.2461, Acc: 0.9162
Val   Loss: 0.5193, Acc: 0.8409
✅ Saved best model.

🌍 Epoch 6/10
100%|██████████| 2976/2976 [28:04<00:00,  1.77it/s]
Train Loss: 0.1607, Acc: 0.9453
Val   Loss: 0.5365, Acc: 0.8468
✅ Saved best model.

🌍 Epoch 7/10
100%|██████████| 2976/2976 [28:06<00:00,  1.77it/s]
Train Loss: 0.1052, Acc: 0.9653
Val   Loss: 0.5558, Acc: 0.8502
✅ Saved best model.

🌍 Epoch 8/10
100%|██████████| 2976/2976 [28:22<00:00,  1.75it/s]
Train Loss: 0.0784, Acc: 0.9742
Val   Loss: 0.5883, Acc: 0.8461

🌍 Epoch 9/10
  5%|▍         | 144/2976 [01:22<27:07,  1.74it/s]

In [4]:
# --- Hybrid CNN + ViT Model ---
class CNN_ViT_Hybrid(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        # Conv feature extractor (ConvNeXt-T or ResNet50)
        self.cnn = timm.create_model("convnext_tiny", pretrained=True, features_only=True)
        cnn_out_channels = self.cnn.feature_info[-1]['num_chs']

        # ViT block (tiny patch-based attention)
        self.vit = timm.create_model("vit_small_patch16_224", pretrained=True)
        self.vit.head = nn.Identity()  # remove classifier

        # Fusion + Classifier
        self.pool = nn.AdaptiveAvgPool2d((14, 14))
        self.proj = nn.Linear(cnn_out_channels, self.vit.embed_dim)
        self.dropout = nn.Dropout(p=0.3)  # Try 0.3–0.5 if needed

        self.classifier = nn.Linear(self.vit.embed_dim, num_classes)

    def forward(self, x):
        # CNN backbone
        x = self.cnn(x)[-1]  # (B, C, H, W)

        # Pool to fixed size
        x = self.pool(x)  # (B, C, 14, 14)

        # Flatten and project
        x = x.flatten(2).transpose(1, 2)  # (B, 196, C)
        x = self.proj(x)  # (B, 196, D)

        # ViT blocks
        x = self.vit.blocks(x)  # (B, 196, D)
        x = x.mean(dim=1)       # Global avg pool over tokens → (B, D)

        x = self.dropout(x)
        return self.classifier(x)


# --- Transforms ---
train_transforms = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_transforms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- Load Data ---
train_ds = GeoDataset(TRAIN_CSV, TRAIN_IMG_DIR, class_to_idx, train_transforms)
test_ds  = GeoDataset(TEST_CSV,  TEST_IMG_DIR,  class_to_idx, test_transforms)

num_val = int(0.2 * len(train_ds))
num_train = len(train_ds) - num_val
train_subset, val_subset = random_split(train_ds, [num_train, num_val], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_subset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,      batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# --- Model ---
model = CNN_ViT_Hybrid(num_classes=len(train_ds.classes)).to(device)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
# Freeze CNN
for param in model.cnn.parameters():
    param.requires_grad = False
# Create optimizer only for trainable layers (ViT + classifier)
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-4
)


# --- Training & Eval ---
def train_one_epoch(model, loader):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        correct += outputs.argmax(1).eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def evaluate(model, loader):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            correct += outputs.argmax(1).eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

# --- Training Loop ---
best_val_acc = 0.0
for epoch in range(EPOCHS):
    print(f"\n🌍 Epoch {epoch+1}/{EPOCHS}")
    
    # Unfreeze after 3 epochs
    if epoch == 3:
        print("🔓 Gradually unfreezing CNN (stage4 and norm layers)...")
        for name, param in model.cnn.named_parameters():
            if "stage4" in name or "norm" in name:
                '''Unfreezes only stage4 and norm layers of ConvNeXt at epoch 3.
                Keeps earlier stages (1–3) frozen — these usually contain low-level features 
                like edges and colors, which don’t need fine-tuning.
                More stable and less likely to overfit or forget pretrained features too early.'''
                param.requires_grad = True  # unfreeze last stage and normalization layers
            else:
                param.requires_grad = False  # keep the rest frozen

        # Rebuild optimizer to include newly trainable layers
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5, weight_decay=1e-4
        )


    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc     = evaluate(model, val_loader)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"hybrid_best_model_epoch{epoch+1}.pth")
        print("✅ Saved best model.")

# --- Final Test ---
test_loss, test_acc = evaluate(model, test_loader)
print(f"\n✅ Final Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")


🌍 Epoch 1/10


100%|██████████| 2976/2976 [25:15<00:00,  1.96it/s]


Train Loss: 2.6570, Acc: 0.4155
Val   Loss: 2.0664, Acc: 0.5733
✅ Saved best model.

🌍 Epoch 2/10


100%|██████████| 2976/2976 [19:00<00:00,  2.61it/s]


Train Loss: 1.8256, Acc: 0.6514
Val   Loss: 1.7236, Acc: 0.6777
✅ Saved best model.

🌍 Epoch 3/10


100%|██████████| 2976/2976 [18:32<00:00,  2.67it/s]


Train Loss: 1.5103, Acc: 0.7548
Val   Loss: 1.5676, Acc: 0.7300
✅ Saved best model.

🌍 Epoch 4/10
🔓 Gradually unfreezing CNN (stage4 and norm layers)...


100%|██████████| 2976/2976 [29:35<00:00,  1.68it/s]


Train Loss: 1.1424, Acc: 0.8853
Val   Loss: 1.3601, Acc: 0.8050
✅ Saved best model.

🌍 Epoch 5/10


100%|██████████| 2976/2976 [21:22<00:00,  2.32it/s]


Train Loss: 1.0470, Acc: 0.9200
Val   Loss: 1.3516, Acc: 0.8085
✅ Saved best model.

🌍 Epoch 6/10


100%|██████████| 2976/2976 [21:02<00:00,  2.36it/s]


Train Loss: 0.9890, Acc: 0.9419
Val   Loss: 1.3551, Acc: 0.8114
✅ Saved best model.

🌍 Epoch 7/10


100%|██████████| 2976/2976 [22:29<00:00,  2.21it/s]


Train Loss: 0.9409, Acc: 0.9594
Val   Loss: 1.3651, Acc: 0.8124
✅ Saved best model.

🌍 Epoch 8/10


  6%|▌         | 175/2976 [01:17<20:33,  2.27it/s]


KeyboardInterrupt: 

🌍 Epoch 1/10
100%|██████████| 2976/2976 [25:15<00:00,  1.96it/s]
Train Loss: 2.6570, Acc: 0.4155
Val   Loss: 2.0664, Acc: 0.5733
✅ Saved best model.

🌍 Epoch 2/10
100%|██████████| 2976/2976 [19:00<00:00,  2.61it/s]
Train Loss: 1.8256, Acc: 0.6514
Val   Loss: 1.7236, Acc: 0.6777
✅ Saved best model.

🌍 Epoch 3/10
100%|██████████| 2976/2976 [18:32<00:00,  2.67it/s]
Train Loss: 1.5103, Acc: 0.7548
Val   Loss: 1.5676, Acc: 0.7300
✅ Saved best model.

🌍 Epoch 4/10
🔓 Gradually unfreezing CNN (stage4 and norm layers)...
100%|██████████| 2976/2976 [29:35<00:00,  1.68it/s]
Train Loss: 1.1424, Acc: 0.8853
Val   Loss: 1.3601, Acc: 0.8050
✅ Saved best model.

🌍 Epoch 5/10
100%|██████████| 2976/2976 [21:22<00:00,  2.32it/s]
Train Loss: 1.0470, Acc: 0.9200
Val   Loss: 1.3516, Acc: 0.8085
✅ Saved best model.

🌍 Epoch 6/10
100%|██████████| 2976/2976 [21:02<00:00,  2.36it/s]
Train Loss: 0.9890, Acc: 0.9419
Val   Loss: 1.3551, Acc: 0.8114
✅ Saved best model.

🌍 Epoch 7/10
100%|██████████| 2976/2976 [22:29<00:00,  2.21it/s]
Train Loss: 0.9409, Acc: 0.9594
Val   Loss: 1.3651, Acc: 0.8124
✅ Saved best model.

🌍 Epoch 8/10
  6%|▌         | 175/2976 [01:17<20:33,  2.27it/s]

In [22]:
# --- Load Best Model (replace X with best epoch) ---
model.load_state_dict(torch.load('hybrid_best_model_epoch7.pth'))
model.to(device)

# --- Final Test withiyt finetuning through freezing ---
test_loss, test_acc = evaluate(model, test_loader)
print(f"\n✅ Final Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")

  model.load_state_dict(torch.load('hybrid_best_model_epoch7.pth'))



✅ Final Test Loss: 4.1722, Accuracy: 0.4233
