In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))


CUDA available: True
GPU name: NVIDIA GeForce RTX 3070 Ti


In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from tqdm import tqdm
import timm
import warnings
warnings.filterwarnings('ignore')


# 🔑 Set random seed (updated to 1029)
torch.manual_seed(1029)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory/(1024**3):.1f}GB")


# 🔄 Update to local paths
train_dir = r"C:\Users\Aufb\Downloads\fish_dataset_categories"
test_dir  = r"C:\Users\Aufb\Desktop\Test"
output_file = r"C:\Users\Aufb\Desktop\FIT5210_Life_Prediction.csv"

if not os.path.exists(train_dir):
    print(f"❌ Data directory does not exist: {train_dir}")
    exit(1)


MODEL_NAME = 'convnext_large.fb_in22k_ft_in1k'
TARGET_SIZE = 224  

print(f"🎯 Specific model: {MODEL_NAME}")
print(f"📏 Input size: {TARGET_SIZE}×{TARGET_SIZE}")


train_transform = transforms.Compose([
    transforms.Resize(int(TARGET_SIZE * 1.2)),
    transforms.RandomResizedCrop(TARGET_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(int(TARGET_SIZE * 1.1)),
    transforms.CenterCrop(TARGET_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


print("📂 Loading data...")
full_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
print(f"Found {len(full_dataset.classes)} classes, {len(full_dataset)} images")


train_size = int(0.85 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])


val_dataset.dataset.transform = test_transform


BATCH_SIZE = 16 if torch.cuda.is_available() else 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

print(f"Train set: {len(train_dataset)} samples")
print(f"Validation set: {len(val_dataset)} samples")


print(f"🔍 Loading model: {MODEL_NAME}")
try:
    model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=len(full_dataset.classes))
    model = model.to(device)
    print(f"✅ Model loaded successfully: {MODEL_NAME}")
    
    
    total_params = sum(p.numel() for p in model.parameters())
    print(f"📈 Number of parameters: {total_params:,}")
    
except Exception as e:
    print(f"❌ Failed to load model: {e}")
    exit(1)


optimizer = optim.AdamW([
    {'params': model.stem.parameters(), 'lr': 1e-5},
    {'params': model.stages.parameters(), 'lr': 2e-5},
    {'params': model.head.parameters(), 'lr': 5e-4}
], weight_decay=0.05)

scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
criterion = nn.CrossEntropyLoss()


scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None

def train_convnext():
    print(f"\n🚀 Start ConvNeXt-specific training...")
    print(f"Using model: {MODEL_NAME}")
    
    best_acc = 0.0
    best_epoch = 0
    
    
    for epoch in range(10):
        print(f'\nEpoch {epoch+1}/10')
        print('-' * 50)
        
        
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        pbar = tqdm(train_loader, desc='Training')
        for batch_idx, (data, target) in enumerate(pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            
            if scaler:
                with torch.cuda.amp.autocast():
                    output = model(data)
                    loss = criterion(output, target)
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            train_total += target.size(0)
            train_correct += (predicted == target).sum().item()
            
            
            current_lr = optimizer.param_groups[2]['lr']
            pbar.set_postfix({
                'Loss': f'{loss.item():.4f}', 
                'Acc': f'{100.*train_correct/train_total:.2f}%',
                'LR': f'{current_lr:.2e}'
            })
        
        
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for data, target in tqdm(val_loader, desc='Validation'):
                data, target = data.to(device), target.to(device)
                
                if scaler:
                    with torch.cuda.amp.autocast():
                        output = model(data)
                else:
                    output = model(data)
                
                val_loss += criterion(output, target).item()
                _, predicted = torch.max(output.data, 1)
                val_total += target.size(0)
                val_correct += (predicted == target).sum().item()
        
        train_acc = train_correct / train_total
        val_acc = val_correct / val_total
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        print(f'Train loss: {avg_train_loss:.4f}, Train accuracy: {train_acc:.4f}')
        print(f'Val loss: {avg_val_loss:.4f}, Val accuracy: {val_acc:.4f}')
        
        
        if val_acc > best_acc:
            best_acc = val_acc
            best_epoch = epoch + 1
            torch.save({
                'model_state_dict': model.state_dict(),
                'best_acc': best_acc,
                'class_names': full_dataset.classes,
                'model_name': MODEL_NAME,
                'target_size': TARGET_SIZE,
                'epoch': epoch + 1
            }, 'convnext_best_model.pth')
            print(f'🎉 New best accuracy: {best_acc:.4f} (Epoch {epoch+1})')
        
        scheduler.step()
        print(f'Learning rate: {scheduler.get_last_lr()[0]:.2e}')
    
    print(f'\n🏆 Training complete! Best validation accuracy: {best_acc:.4f} (Epoch {best_epoch})')
    return best_acc


final_acc = train_convnext()
print(f"\n🏆 Final validation accuracy: {final_acc:.4f}")


checkpoint = torch.load('convnext_best_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
class_names = checkpoint['class_names']
print(f"📂 Loaded best model, accuracy: {checkpoint['best_acc']:.4f}")
print(f"📅 Best epoch: {checkpoint['epoch']}")


def convnext_tta_predict():
    if not os.path.exists(test_dir):
        print(f"❌ Test directory does not exist: {test_dir}")
        return None, None
    
    print("🔮 Start ConvNeXt-specific TTA prediction...")
    
    
    tta_transforms = [
        transforms.Compose([
            transforms.Resize(int(TARGET_SIZE * 1.1)),
            transforms.CenterCrop(TARGET_SIZE),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.Resize(int(TARGET_SIZE * 1.1)),
            transforms.CenterCrop(TARGET_SIZE),
            transforms.RandomHorizontalFlip(p=1.0),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.Resize(int(TARGET_SIZE * 1.2)),
            transforms.CenterCrop(TARGET_SIZE),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.Resize(int(TARGET_SIZE * 1.15)),
            transforms.CenterCrop(TARGET_SIZE),
            transforms.ColorJitter(brightness=0.1, contrast=0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    ]
    
    predictions = []
    confidences = []
    
    model.eval()
    with torch.no_grad():
        from PIL import Image
        
        
        test_images = []
        for root, _, files in os.walk(test_dir):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    test_images.append(os.path.join(root, file))
        
        test_images.sort()  
        
        for img_path in tqdm(test_images, desc="TTA Predict"):
            img = Image.open(img_path).convert('RGB')
            
            
            tta_outputs = []
            for tta_transform in tta_transforms:
                img_tensor = tta_transform(img).unsqueeze(0).to(device)
                
                if scaler:
                    with torch.cuda.amp.autocast():
                        output = model(img_tensor)
                else:
                    output = model(img_tensor)
                
                tta_outputs.append(torch.softmax(output, dim=1))
            
            
            avg_output = torch.stack(tta_outputs).mean(dim=0)
            confidence, prediction = torch.max(avg_output, dim=1)
            
            predictions.append(prediction.item())
            confidences.append(confidence.item())
    
    return predictions, confidences


test_predictions, test_confidences = convnext_tta_predict()

if test_predictions:
    
    submission_df = pd.DataFrame({
        'ID': range(len(test_predictions)),
        'Label': [class_names[pred] for pred in test_predictions]
    })
    
    submission_df.to_csv(output_file, index=False)  # ⬅️ Save to the specified output path
    
    
    detailed_df = pd.DataFrame({
        'ID': range(len(test_predictions)),
        'Label': [class_names[pred] for pred in test_predictions],
        'Confidence': test_confidences
    })
    detailed_df.to_csv(output_file.replace(".csv", "_detailed.csv"), index=False)
    
    print(f"\n📊 Prediction stats:")
    print(f"Samples: {len(test_predictions)}")
    print(f"Mean confidence: {np.mean(test_confidences):.4f}")
    print(f"Min confidence: {np.min(test_confidences):.4f}")
    print(f"Max confidence: {np.max(test_confidences):.4f}")
    
    
    high_conf_count = sum(1 for c in test_confidences if c > 0.8)
    medium_conf_count = sum(1 for c in test_confidences if 0.5 < c <= 0.8)
    low_conf_count = sum(1 for c in test_confidences if c <= 0.5)
    
    print(f"High-confidence samples (>0.8): {high_conf_count}/{len(test_confidences)} ({100*high_conf_count/len(test_confidences):.1f}%)")
    print(f"Medium-confidence samples (0.5-0.8): {medium_conf_count}/{len(test_confidences)} ({100*medium_conf_count/len(test_confidences):.1f}%)")
    print(f"Low-confidence samples (≤0.5): {low_conf_count}/{len(test_confidences)} ({100*low_conf_count/len(test_confidences):.1f}%)")


Device: cuda
GPU: NVIDIA GeForce RTX 3070 Ti
VRAM: 8.0GB
🎯 Specific model: convnext_large.fb_in22k_ft_in1k
📏 Input size: 224×224
📂 Loading data...
Found 23 classes, 13711 images
Train set: 11654 samples
Validation set: 2057 samples
🔍 Loading model: convnext_large.fb_in22k_ft_in1k
✅ Model loaded successfully: convnext_large.fb_in22k_ft_in1k
📈 Number of parameters: 196,265,687

🚀 Start ConvNeXt-specific training...
Using model: convnext_large.fb_in22k_ft_in1k

Epoch 1/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.98it/s, Loss=0.1409, Acc=91.81%, LR=5.00e-04]
Validation: 100%|██████████| 129/129 [00:16<00:00,  7.65it/s]


Train loss: 0.3347, Train accuracy: 0.9181
Val loss: 0.2297, Val accuracy: 0.9397
🎉 New best accuracy: 0.9397 (Epoch 1)
Learning rate: 9.76e-06

Epoch 2/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:23<00:00,  5.08it/s, Loss=0.0098, Acc=97.19%, LR=4.88e-04]
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.46it/s]


Train loss: 0.1097, Train accuracy: 0.9719
Val loss: 0.2190, Val accuracy: 0.9451
🎉 New best accuracy: 0.9451 (Epoch 2)
Learning rate: 9.05e-06

Epoch 3/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:24<00:00,  5.06it/s, Loss=0.0268, Acc=98.66%, LR=4.52e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.38it/s]


Train loss: 0.0573, Train accuracy: 0.9866
Val loss: 0.2386, Val accuracy: 0.9470
🎉 New best accuracy: 0.9470 (Epoch 3)
Learning rate: 7.94e-06

Epoch 4/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.97it/s, Loss=0.0013, Acc=99.06%, LR=3.97e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.29it/s]


Train loss: 0.0366, Train accuracy: 0.9906
Val loss: 0.2545, Val accuracy: 0.9441
Learning rate: 6.55e-06

Epoch 5/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:25<00:00,  5.01it/s, Loss=0.0003, Acc=99.34%, LR=3.27e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.39it/s]


Train loss: 0.0249, Train accuracy: 0.9934
Val loss: 0.2678, Val accuracy: 0.9451
Learning rate: 5.00e-06

Epoch 6/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.97it/s, Loss=0.0010, Acc=99.41%, LR=2.50e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.37it/s]


Train loss: 0.0207, Train accuracy: 0.9941
Val loss: 0.2580, Val accuracy: 0.9494
🎉 New best accuracy: 0.9494 (Epoch 6)
Learning rate: 3.45e-06

Epoch 7/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.98it/s, Loss=0.0033, Acc=99.43%, LR=1.73e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.26it/s]


Train loss: 0.0168, Train accuracy: 0.9943
Val loss: 0.2493, Val accuracy: 0.9499
🎉 New best accuracy: 0.9499 (Epoch 7)
Learning rate: 2.06e-06

Epoch 8/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.96it/s, Loss=0.0022, Acc=99.45%, LR=1.03e-04] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.24it/s]


Train loss: 0.0118, Train accuracy: 0.9945
Val loss: 0.2518, Val accuracy: 0.9504
🎉 New best accuracy: 0.9504 (Epoch 8)
Learning rate: 9.55e-07

Epoch 9/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.99it/s, Loss=0.0006, Acc=99.51%, LR=4.77e-05] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.35it/s]


Train loss: 0.0091, Train accuracy: 0.9951
Val loss: 0.2593, Val accuracy: 0.9494
Learning rate: 2.45e-07

Epoch 10/10
--------------------------------------------------


Training: 100%|██████████| 729/729 [02:26<00:00,  4.96it/s, Loss=0.0004, Acc=99.55%, LR=1.22e-05] 
Validation: 100%|██████████| 129/129 [00:13<00:00,  9.29it/s]


Train loss: 0.0073, Train accuracy: 0.9955
Val loss: 0.2567, Val accuracy: 0.9490
Learning rate: 0.00e+00

🏆 Training complete! Best validation accuracy: 0.9504 (Epoch 8)

🏆 Final validation accuracy: 0.9504
📂 Loaded best model, accuracy: 0.9504
📅 Best epoch: 8
🔮 Start ConvNeXt-specific TTA prediction...


TTA Predict: 100%|██████████| 5/5 [00:00<00:00,  8.83it/s]


📊 Prediction stats:
Samples: 5
Mean confidence: 0.9988
Min confidence: 0.9956
Max confidence: 1.0000
High-confidence samples (>0.8): 5/5 (100.0%)
Medium-confidence samples (0.5-0.8): 0/5 (0.0%)
Low-confidence samples (≤0.5): 0/5 (0.0%)



