In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.multiprocessing as mp
from FPN import Features, FPNetwork , classificationhead , bboxhead
from Loss import Lossfunction
from datasets import load_dataset
import torch.optim as optim
from torch.cuda.amp import  GradScaler
from torch.amp import autocast
import gc
from dataset_convert import AnchorGenerator, FaceDetectionDataset
# device = torch.device("mps")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mp.set_start_method('spawn', force=True)

In [3]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model = model.features.to(device)

Using cache found in /home/.cache/torch/hub/pytorch_vision_v0.10.0


In [4]:
device

device(type='cuda')

In [5]:
dataset = load_dataset("CUHK-CSE/wider_face")
train_dataset = dataset['train'].with_format("torch")
val_dataset = dataset['validation'].with_format("torch")

In [6]:
train_dataset[0]

{'image': tensor([[[237, 237, 237,  ..., 248, 255, 243],
          [ 59,  59,  59,  ..., 168, 250, 255],
          [ 37,  37,  37,  ..., 162, 255, 255],
          ...,
          [ 67,  66,  64,  ..., 156, 255, 254],
          [ 74,  72,  68,  ..., 156, 255, 254],
          [ 72,  70,  67,  ..., 156, 255, 255]],
 
         [[251, 251, 251,  ..., 250, 255, 243],
          [ 72,  72,  72,  ..., 169, 249, 255],
          [ 50,  50,  50,  ..., 162, 254, 255],
          ...,
          [ 73,  72,  70,  ..., 156, 255, 253],
          [ 80,  78,  74,  ..., 156, 255, 253],
          [ 78,  76,  73,  ..., 156, 255, 254]],
 
         [[255, 255, 255,  ..., 249, 250, 233],
          [106, 106, 106,  ..., 173, 245, 247],
          [ 94,  94,  94,  ..., 172, 255, 253],
          ...,
          [ 35,  34,  32,  ..., 146, 247, 248],
          [ 42,  40,  36,  ..., 146, 247, 248],
          [ 40,  38,  35,  ..., 146, 248, 249]]], dtype=torch.uint8),
 'faces': {'bbox': tensor([[449., 330., 122., 149.]]),

In [7]:
extractor = Features(model,['3','6', '13','18'])
topdown = FPNetwork(out_channels=256)
classifier = classificationhead(channels=256, num_anchors= 12, num_of_classes= 1)
bboxregression = bboxhead(channels= 256 , num_anchors= 12)
loss =Lossfunction(lambd=10)
anchors = AnchorGenerator()
data = FaceDetectionDataset(train_dataset,anchors)
vali_data = FaceDetectionDataset(val_dataset,anchors)

In [8]:
torch.cuda.empty_cache()
print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

Allocated: 0.04 GB
Cached: 0.05 GB


In [9]:
print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

Allocated: 0.04 GB
Cached: 0.05 GB


In [10]:
# batch_size = 16
# epochs = 20
# learning_rate = 1e-3
def forward(p):
    features = extractor.extract(p)
    newfeatures = topdown(features)
    output = {}
    for key in list(newfeatures.keys()):
        temp = {}
        temp["bbox"] = bboxregression(newfeatures[key])
        temp["cls"] = classifier(newfeatures[key])
        output[key] = temp
    return output

def train(epochs:int,training_data , validation_data=None):
    extractor.train()
    topdown.train()
    classifier.train()
    bboxregression.train()
    optimizer = optim.Adam(list(extractor.parameters())+
                           list(topdown.parameters())+
                           list(classifier.parameters())+
                           list(bboxregression.parameters()), lr=learning_rate)
    loSS = {}
    for i in range (0,epochs):
        train_loss= 0
        for key, (image,bbox) in enumerate(training_data):
            epoch_loss = {}
            model_pred = forward(image.cuda())
            optimizer.zero_grad()
            ll = loss(model_pred, bbox)
            del image , model_pred , bbox

            train_loss+=ll
            if (key % 1 == 0):
                epoch_loss[key] = train_loss/10
                print(f"The avg loss for {key}th data is {train_loss/10}")
                train_loss = 0   
            if key == 4:
                    break
            ll.backward()
            optimizer.step()
            del ll
        loSS[i] = epoch_loss
        del epoch_loss
    return loSS

# train(epochs=epochs,training_data=training_data)


In [11]:
total_params=sum(p.numel() for p in extractor.parameters())
total_params+= sum(p.numel() for p in topdown.parameters())
total_params+= sum(p.numel() for p in classifier.parameters())
total_params+= sum(p.numel() for p in bboxregression.parameters())

print(total_params)

9818964


In [12]:
scaler = GradScaler()

  scaler = GradScaler()


In [18]:
learning_rate = 5e-4

def train_with_accumulation(epochs: int, training_data, validation_data=None, accumulation_steps=4):
    # Set models to training mode
    extractor.train()
    topdown.train()
    classifier.train()
    bboxregression.train()
    
    # Enable gradient checkpointing
    if hasattr(extractor, 'gradient_checkpointing_enable'):
        extractor.gradient_checkpointing_enable()
    if hasattr(topdown, 'gradient_checkpointing_enable'):
        topdown.gradient_checkpointing_enable()
    
    optimizer = optim.Adam(
        list(extractor.parameters()) +
        list(topdown.parameters()) +
        list(classifier.parameters()) +
        list(bboxregression.parameters()), 
        lr=learning_rate
    )
    
    loss_history = {}
    
    for epoch in range(epochs):
        epoch_loss = 0.0
        batch_count = 0
        running_loss = 0.0
        
        print(f"\nEpoch {epoch + 1}/{epochs}")
        print("-" * 30)
        try:
            for batch_idx, (image, bbox) in enumerate(training_data):
                # Mixed precision forward pass
                optimizer.zero_grad()
                
                with autocast('cuda'):
                    model_pred = forward(image.cuda())
                    ll = loss(model_pred, bbox) / accumulation_steps  # Scale loss
                # del image , model_pred , bbox
                # Backward pass
                scaler.scale(ll).backward()
                
                # Step optimizer every accumulation_steps
                if (batch_idx + 1) % accumulation_steps == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                
                # Accumulate loss
                batch_loss = ll.item() * accumulation_steps  # Unscale for logging
                epoch_loss += batch_loss
                running_loss += batch_loss
                batch_count += 1
                
                # Print progress
                if (batch_idx + 1) % 10 == 0:
                    avg_running_loss = running_loss / 10
                    # print("-" * 30)
                    print(f"Batch {batch_idx + 1}: Avg Loss = {avg_running_loss:.6f}")
                    # print("-" * 30)
                    running_loss = 0.0

                # Memory 
                del image , model_pred , bbox

                del ll
                torch.cuda.empty_cache()
                gc.collect()
        except Exception as e :
            print (e)
            print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
            print(f"Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
            torch.cuda.empty_cache()
            print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
            print(f"Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
            break

        except KeyboardInterrupt:
            print("INTERRUPTED!!")
            return loss_history   

                
        if batch_count % accumulation_steps != 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        
        avg_epoch_loss = epoch_loss / batch_count if batch_count > 0 else 0
        loss_history[epoch] = avg_epoch_loss
        print("-" * 30)
        print(f"Epoch {epoch + 1} Average Loss: {avg_epoch_loss:.6f}")
        print("-" * 30)
        
        
        # Cleanup at epoch end
        gc.collect()
        torch.cuda.empty_cache()
    
    return loss_history


In [19]:
torch.cuda.empty_cache()
batch_size = 8
epochs = 5
training_data = DataLoader(
    data,
    batch_size=batch_size, 
    num_workers=4,     # Reduce if GPU memory is full
    pin_memory=True,         # Faster CPU->GPU transfer
    persistent_workers=True, # Reuse workers across epochs
    prefetch_factor=4,       # Prefetch more batches
    drop_last=True,
    shuffle=True
)
validation_data = DataLoader(vali_data,
    batch_size=batch_size, 
    num_workers=4,     # Reduce if GPU memory is full
    pin_memory=True,         # Faster CPU->GPU transfer
    persistent_workers=True, # Reuse workers across epochs
    prefetch_factor=4,       # Prefetch more batches
    drop_last=True,
    shuffle=True
)

In [15]:
ss = list(classifier.parameters())

In [20]:
lossdata = train_with_accumulation(epochs = 1, training_data = training_data)


Epoch 1/1
------------------------------


Batch 10: Avg Loss = 460.772297
Batch 20: Avg Loss = 140.993696
Batch 30: Avg Loss = 200.815022
Batch 40: Avg Loss = 158.538258
Batch 50: Avg Loss = 149.542924
Batch 60: Avg Loss = 202.118232
Batch 70: Avg Loss = 131.681020
Batch 80: Avg Loss = 3440.989156
Batch 90: Avg Loss = 277.677647
Batch 100: Avg Loss = 165.439429
Batch 110: Avg Loss = 233.053395
Batch 120: Avg Loss = 194.951407
Batch 130: Avg Loss = 158.863708
Batch 140: Avg Loss = 205.299765
Batch 150: Avg Loss = 223.091113
Batch 160: Avg Loss = 161.764712
Batch 170: Avg Loss = 121.819528
Batch 180: Avg Loss = 413.141696
Batch 190: Avg Loss = 244.212051
Batch 200: Avg Loss = 169.700817
Batch 210: Avg Loss = 286.633378
Batch 220: Avg Loss = 170.158089
Batch 230: Avg Loss = 155.058964
Batch 240: Avg Loss = 131.824800
Batch 250: Avg Loss = 433.727970
Batch 260: Avg Loss = 162.855054
Batch 270: Avg Loss = 263.327998
Batch 280: Avg Loss = 3278.883255
Batch 290: Avg Loss = 3203.168296
Batch 300: Avg Loss = 139.934084
Batch 310: Avg L

In [None]:
loss = Lossfunction()
extractor.eval()
topdown.eval()
classifier.eval()
bboxregression.eval()
accumulation_steps = 4
for batch_idx, (image, bbox) in enumerate(validation_data):

    with autocast('cuda'):
        model_pred = forward(image.cuda())
        ll = loss(model_pred, bbox) / accumulation_steps  # Scale loss
    del image , model_pred , bbox
    print(ll)

In [19]:
torch.cuda.empty_cache()
print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

Allocated: 2.11 GB
Cached: 2.21 GB


In [20]:
data[1000][0].unsqueeze(0).shape

torch.Size([1, 3, 640, 640])

In [24]:
s=forward(data[200][0].unsqueeze(0).cuda())
t= data[200][1]
ls = loss(s,t)


In [25]:
ls

tensor(106.7692, device='cuda:0', grad_fn=<AddBackward0>)

In [19]:
len(t[1]['cls_targets'].shape)

2

In [33]:
(s['3']['bbox'].cpu()-t[3]['bbox_targets']).sum()


tensor(-9428.2197, grad_fn=<SumBackward0>)

In [34]:
(s['3']['cls'].cpu()>0.9).sum()

tensor(0)

In [37]:
torch.sigmoid((s['3']['cls']))

tensor([[[0.2211],
         [0.2228],
         [0.2230],
         ...,
         [0.2234],
         [0.2228],
         [0.2218]]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [39]:
t[-1]['cls_targets']

tensor([[[0],
         [0],
         [0],
         ...,
         [0],
         [0],
         [0]]])

In [37]:
ls = loss(s,t)

In [38]:
ls

tensor(3.9703, device='cuda:0', grad_fn=<AddBackward0>)

In [26]:
s['18']['bbox'].squeeze(0).cpu()


tensor([[0.4933, 0.5469, 0.4821, 0.4534],
        [0.4507, 0.4666, 0.4542, 0.4803],
        [0.4810, 0.4796, 0.4658, 0.4742],
        ...,
        [0.4389, 0.0000, 0.4425, 0.0434],
        [0.4311, 0.0000, 0.4414, 0.0056],
        [0.4147, 0.0000, 0.3418, 0.0000]], grad_fn=<ToCopyBackward0>)

In [None]:

(data[1000][1][0]['bbox_targets'] - s['18']['bbox'].squeeze(0).cpu())


tensor(-0.1751, grad_fn=<MeanBackward0>)

In [80]:
model3 = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model3 = model3.features.to(device)

Using cache found in /home/.cache/torch/hub/pytorch_vision_v0.10.0


In [None]:
torch.all(list(extractor.parameters()) == list(model3.parameters()))

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [21]:
torch.save(extractor.state_dict(),'/home/faces2.0/models/extractor.pt')
torch.save(topdown.state_dict(),'/home/faces2.0/models/topdown.pt')
torch.save(classifier.state_dict(),'/home/faces2.0/models/classifier.pt')
torch.save(bboxregression.state_dict(),'/home/faces2.0/models/bboxregression.pt')

