In [1]:
print("start")
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import cv2
import numpy as np
from tqdm import tqdm
import time
import warnings
print("import end")


warnings.filterwarnings('ignore')
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'
torch.backends.cudnn.benchmark = True

def preprocess_image(image):
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    enhanced = cv2.merge((cl,a,b))
    return cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)

class DigitDataset:
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.images = [f for f in sorted(os.listdir(image_dir)) if f.endswith(('.jpg', '.jpeg', '.png'))]
        
    def __getitem__(self, idx):
        try:
            image_path = os.path.join(self.image_dir, self.images[idx])
            image = cv2.imread(image_path)
            if image is None:
                raise ValueError(f"Failed to load image: {image_path}")
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = preprocess_image(image)
            
            label_path = os.path.join(self.label_dir, self.images[idx].replace('.jpg', '.txt'))
            boxes = []
            labels = []
            
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    class_id, x, y, w, h = map(float, line.strip().split())
                    adjusted_class_id = int(class_id) + 1
                    
                    
                    x1 = max(0, (x - w/2) * image.shape[1])
                    y1 = max(0, (y - h/2) * image.shape[0])
                    x2 = min(image.shape[1], (x + w/2) * image.shape[1])
                    y2 = min(image.shape[0], (y + h/2) * image.shape[0])
                    
                    if x2 <= x1 or y2 <= y1 or (x2 - x1) < 6 or (y2 - y1) < 6:
                        continue
                    
                    boxes.append([x1, y1, x2, y2])
                    labels.append(adjusted_class_id)
            
            if not boxes:
                return self.__getitem__((idx + 1) % len(self))
            
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
            
            if self.transforms:
                transformed = self.transforms(
                    image=image,
                    bboxes=boxes,
                    class_labels=labels
                )
                image = transformed['image']
                boxes = transformed['bboxes']
                labels = transformed['class_labels']
            
            if len(boxes) == 0:
                return self.__getitem__((idx + 1) % len(self))
                
            target = {
                'boxes': torch.as_tensor(boxes, dtype=torch.float32),
                'labels': torch.as_tensor(labels, dtype=torch.int64)
            }
            
            return image, target
        except Exception as e:
            print(f"Error processing image {self.images[idx]}: {str(e)}")
            return self.__getitem__((idx + 1) % len(self))

    def __len__(self):
        return len(self.images)

def get_transform(train):
    if train:
        return A.Compose([
            A.Resize(800, 800, always_apply=True),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.3, rotate_limit=20, p=0.7),
            A.OneOf([
                A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
                A.RandomGamma(gamma_limit=(70, 130), p=0.5),
                A.CLAHE(clip_limit=4.0, p=0.5),
            ], p=0.7),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ], bbox_params=A.BboxParams(
            format='pascal_voc',
            label_fields=['class_labels'],
            min_visibility=0.6
        ))
    else:
        return A.Compose([
            A.Resize(800, 800, always_apply=True),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ], bbox_params=A.BboxParams(
            format='pascal_voc',
            label_fields=['class_labels']
        ))

def train_one_epoch(model, optimizer, data_loader, device, scheduler, scaler):
    model.train()
    total_loss = 0
    cls_loss = 0
    box_loss = 0
    
    for batch_idx, (images, targets) in enumerate(tqdm(data_loader)):
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad(set_to_none=True)
        
        with torch.autocast(device_type='cuda', dtype=torch.float16):
            loss_dict = model(images, targets)
            loss_dict['loss_box_reg'] *= 1.5
            losses = sum(loss for loss in loss_dict.values())
            cls_loss += loss_dict['loss_classifier'].item()
            box_loss += loss_dict['loss_box_reg'].item()
        
        scaler.scale(losses).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        total_loss += losses.item()
        
        if (batch_idx + 1) % 50 == 0:
            print(f'\nBatch [{batch_idx + 1}/{len(data_loader)}]:')
            print(f'Classification Loss: {cls_loss/(batch_idx+1):.4f}')
            print(f'Box Regression Loss: {box_loss/(batch_idx+1):.4f}')
            print(f'Current LR: {scheduler.get_last_lr()[0]:.6f}')
    
    return {
        'total_loss': total_loss / len(data_loader),
        'cls_loss': cls_loss / len(data_loader),
        'box_loss': box_loss / len(data_loader)
    }

def main():
    start_time = time.time()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    num_classes = 11
    num_epochs = 20
    batch_size = 16
    learning_rate = 5e-4
    
    train_dataset = DigitDataset(
        image_dir='data/train/images',
        label_dir='data/train/labels',
        transforms=get_transform(train=True)
    )
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
        collate_fn=lambda x: tuple(zip(*x)),
        pin_memory=True,
        persistent_workers=True
    )
    
    model = fasterrcnn_resnet50_fpn_v2(
        weights='DEFAULT',
        box_score_thresh=0.01,
        box_nms_thresh=0.45,
        box_detections_per_img=3,
        rpn_pre_nms_top_n_train=2000,
        rpn_post_nms_top_n_train=1000,
        rpn_pre_nms_top_n_test=1000,
        rpn_post_nms_top_n_test=500,
        min_size=1024,
        max_size=1600
    )
    
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model = model.to(device)
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params, lr=learning_rate, weight_decay=0.02, amsgrad=True)
    
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=learning_rate,
        epochs=num_epochs,
        steps_per_epoch=len(train_loader),
        pct_start=0.3,
        anneal_strategy='cos',
        div_factor=10.0
    )
    
    scaler = torch.amp.GradScaler('cuda')
    best_loss = float('inf')
    
    print("\nStarting Training...")
    print(f"Total epochs: {num_epochs}")
    print(f"Batch size: {batch_size}")
    print(f"Initial learning rate: {learning_rate}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Steps per epoch: {len(train_loader)}\n")
    
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}]")
        print("-" * 20)
        
        losses = train_one_epoch(model, optimizer, train_loader, device, scheduler, scaler)
        
        print(f"\nEpoch Summary:")
        print(f"Total Loss: {losses['total_loss']:.4f}")
        print(f"Classification Loss: {losses['cls_loss']:.4f}")
        print(f"Box Regression Loss: {losses['box_loss']:.4f}")
        print(f"Learning Rate: {scheduler.get_last_lr()[0]:.6f}")
        
        if losses['total_loss'] < best_loss:
            best_loss = losses['total_loss']
            print(f"\nSaving best model with loss: {best_loss:.4f}")
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': best_loss,
            }, 'best_model.pth')
    
    training_time = (time.time() - start_time) / 60
    print("\n" + "="*50)
    print("Training Completed!")
    print("="*50)
    print(f"Total training time: {training_time:.2f} minutes")
    print(f"Best model saved with loss: {best_loss:.4f}")
    print(f"Final learning rate: {scheduler.get_last_lr()[0]:.6f}")
    print("\nTraining Statistics:")
    print(f"- Total epochs: {num_epochs}")
    print(f"- Total iterations: {num_epochs * len(train_loader)}")
    print(f"- Images processed: {num_epochs * len(train_dataset)}")
    print(f"- Final classification loss: {losses['cls_loss']:.4f}")
    print(f"- Final box regression loss: {losses['box_loss']:.4f}")
    print("="*50)

if __name__ == '__main__':
    main()

start


  check_for_updates()


import end
Using device: cuda

Starting Training...
Total epochs: 20
Batch size: 16
Initial learning rate: 0.0005
Training samples: 3048
Steps per epoch: 191


Epoch [1/20]
--------------------


  0%|          | 0/191 [00:20<?, ?it/s]


KeyboardInterrupt: 