# Faster RCNN implementation for radar

## Imports

In [None]:
import os
import cv2
import glob
import time
import json
import torch
import pickle
import random
import torchvision
import numpy as np
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from torchvision import models, transforms
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

## Import config info 

In [94]:
def load_config(config_path):
    with open(config_path, 'r') as config_file:
        content = config_file.read()
        if not content.strip():
            raise ValueError("Configuration file is empty")
        config = json.loads(content)
    return config


config_path = "/home/hawk/Desktop/objectDetection/Object_detection/configs/config.json"
config = load_config(config_path)

## Load RADDet data

In [95]:
# Define folder paths
RAD_folder_path = config["RAD_folder_path"]
stereo_folder_path = config["stereo_folder_path"]
GT_folder_path = config["GT_folder_path"]

# Numpy files
all_RAD_files = os.listdir(RAD_folder_path)
RAD_files = [file for file in all_RAD_files if file.endswith('.npy')]
RAD_files_paths = [os.path.join(RAD_folder_path, file) for file in RAD_files]

# Ground truth files
all_GT_files = os.listdir(GT_folder_path)
GT_files = [file for file in all_GT_files if file.endswith('.pickle')]
GT_files_paths = [os.path.join(GT_folder_path, file) for file in all_GT_files]

# Stereo image files
all_stereo_files = os.listdir(stereo_folder_path)
stereo_files = [file for file in all_stereo_files if file.endswith('.jpg')]
stereo_files_paths = [os.path.join(stereo_folder_path, file) for file in all_stereo_files]

all_loaded_RAD_files = []
for file in RAD_files_paths:
    loaded_file = np.load(file)
    all_loaded_RAD_files.append(loaded_file)
print(' ---- RAD ----')
print(f'number of RAD files loaded: {len(all_loaded_RAD_files)}')
print(f'each RAD file is of size: {all_loaded_RAD_files[0].shape} which stands for: (Range, Azimuth, Doppler)')
print('\n')

all_loaded_GT_files = []
for file in GT_files_paths:
    with open(file, 'rb') as f:  
        ground_truth = pickle.load(f) 
    all_loaded_GT_files.append(ground_truth)
print(' ---- GT ----')
print(f'number of GT files loaded: {len(all_loaded_GT_files)}')
print(f'each GT file is a dict with length 3: classes, boxes and cart_boxes. an example:')
print(all_loaded_GT_files[0])
print('\n')

all_loaded_stereo_files = []
for jpg_file in stereo_files_paths:
    img = Image.open(jpg_file)
    img = img.convert('RGB')
    all_loaded_stereo_files.append(img)
print(' ---- Stereo ----')
print(f"Number of loaded images: {len(all_loaded_stereo_files)}")

 ---- RAD ----
number of RAD files loaded: 126
each RAD file is of size: (256, 256, 64) which stands for: (Range, Azimuth, Doppler)


 ---- GT ----
number of GT files loaded: 126
each GT file is a dict with length 3: classes, boxes and cart_boxes. an example:
{'classes': ['truck', 'car', 'car', 'car'], 'boxes': array([[177.5, 173. ,  30. ,  28. ,  51. ,  25. ],
       [202. , 217.5,  32. ,   9. ,  34. ,  17. ],
       [ 66. ,  95. ,  40.5,   5. ,   7. ,   4. ],
       [ 15.5,  89.5,  38. ,   4. ,   8. ,   5. ]]), 'cart_boxes': array([[183.5, 283.5,  30. ,  30. ],
       [218. , 290.5,  19. ,  10. ],
       [ 72.5, 206. ,   6. ,  11. ],
       [ 26.5, 183. ,   8. ,  13. ]])}


 ---- Stereo ----
Number of loaded images: 126


## Create a custom RADDet dataset 

In [96]:
def readAndEncodeGtRD(gt_instance, rd_shape):
    x_shape, y_shape = rd_shape[1], rd_shape[0]
    boxes = gt_instance["boxes"]
    classes = gt_instance["classes"]
    new_boxes = []
    new_classes = []
    for (box, class_) in zip(boxes, classes):
        yc, xc, h, w = box[0], box[2], box[3], box[5]
        y1, y2, x1, x2 = int(yc - h / 2), int(yc + h / 2), int(xc - w / 2), int(xc + w / 2)
        if x1 < 0:
            x1 += x_shape
            box1 = [y1 / y_shape, x1 / x_shape, y2 / y_shape, x_shape / x_shape]
            box2 = [y1 / y_shape, 0 / x_shape, y2 / y_shape, x2 / x_shape]
            new_boxes.append(box1)
            new_classes.append(class_)
            new_boxes.append(box2)
            new_classes.append(class_)
        elif x2 >= x_shape:
            x2 -= x_shape
            box1 = [y1 / y_shape, x1 / x_shape, y2 / y_shape, x_shape / x_shape]
            box2 = [y1 / y_shape, 0 / x_shape, y2 / y_shape, x2 / x_shape]
            new_boxes.append(box1)
            new_classes.append(class_)
            new_boxes.append(box2)
            new_classes.append(class_)
        else:
            new_boxes.append([y1 / y_shape, x1 / x_shape, y2 / y_shape, x2 / x_shape])
            new_classes.append(class_)
    return new_boxes, new_classes

In [97]:
def complexTo2channels(target_array):
    assert target_array.dtype == np.complex64
    output_array = getMagnitude(target_array)
    output_array = getLog(output_array)
    return output_array

def getMagnitude(target_array, power_order=2):
    target_array = np.abs(target_array)
    target_array = pow(target_array, power_order)
    return target_array

def getLog(target_array, scalar=1., log_10=True):
    if log_10:
        return scalar * np.log10(target_array + 1.)
    else:
        return target_array

def getSumDim(target_array, target_axis):
    output = np.sum(target_array, axis=target_axis)
    return output

In [98]:
class RaddetDataset(Dataset):
    """Custom Dataset for RADDet dataset."""
    
    def __init__(self, RAD_files, GT_files, stereo_files, transform=None):
        self.transform = transform
        self.classes_list = ["person", "bicycle", "car", "motorcycle", "bus", "truck"]
        self.RAD_maps = RAD_files
        self.GT_data = GT_files
        self.stereo_data = stereo_files
        self.global_mean_log = 3.2438383
        self.global_variance_log = 6.8367246
        self.global_max_log = 10.0805629
        self.global_min_log = 0.0
    
    def __len__(self):
        return len(self.RAD_maps)
    
    def __getitem__(self, idx):
        RAD_complex = self.RAD_maps[idx]
        
        RAD_data = complexTo2channels(RAD_complex)
        RAD_data = (RAD_data - self.global_mean_log) / self.global_variance_log
        
        gt_instances = self.GT_data[idx]

        RD_data = getSumDim(RAD_data, target_axis=1)

        bboxes, classes = readAndEncodeGtRD(gt_instances, RD_data.shape)
        #seq_id = int(RAD_filename.split('/')[-2].split('_')[-1])
        
        objects = []
        for box, class_ in zip(bboxes, classes):
            ymin, xmin, ymax, xmax = box
            area = (xmax - xmin) * (ymax - ymin)
            objects.append({
                'bbox': [ymin, xmin, ymax, xmax],
                'label': self.classes_list.index(class_),
                'area': area,
            })
        
        image_filename = self.stereo_data[idx]
        image = plt.imread(image_filename)


        sample = {
            'spectrum': torch.tensor(RD_data, dtype=torch.float32),
            'image': image,
            'image_filename': image_filename,
            'objects': objects,
        }
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [99]:
train_dataset = RaddetDataset(all_loaded_RAD_files, all_loaded_GT_files, stereo_files_paths)#all_loaded_stereo_files)
sample = train_dataset[34]

print("RADDet Dataset Sample Details:")
print("\n")
print(f'Sample spectrum shape: {sample["spectrum"].shape}')
print(f'Image filename: {sample["image_filename"]}')
print(f'Number of objects: {len(sample["objects"])}')
for obj in sample["objects"]:
    print(f'Object bbox: {obj["bbox"]}, label: {obj["label"]}, area: {obj["area"]}')


RADDet Dataset Sample Details:


Sample spectrum shape: torch.Size([256, 64])
Image filename: /home/hawk/Desktop/data/Raddet/train/stereo_image/part_9/004115.jpg
Number of objects: 3
Object bbox: [0.27734375, 0.859375, 0.35546875, 0.90625], label: 2, area: 0.003662109375
Object bbox: [0.33203125, 0.0, 0.36328125, 0.03125], label: 2, area: 0.0009765625
Object bbox: [0.0703125, 0.0, 0.08203125, 0.109375], label: 2, area: 0.00128173828125


In [101]:
def get_transform():
    def transform(sample):
        spectrum = sample['spectrum']
        image = sample['image']
        objects = sample['objects']
        
        image = F.to_tensor(image)
        return {'spectrum': spectrum, 'image': image, 'objects': objects}
    
    return transform

# Function to validate bounding boxes
def is_valid_bbox(bbox):
    ymin, xmin, ymax, xmax = bbox
    return (xmax - xmin) > 0 and (ymax - ymin) > 0

# Load the dataset with transformations
train_dataset = RaddetDataset(all_loaded_RAD_files, all_loaded_GT_files, stereo_files_paths, transform=get_transform())

# Define the data loader
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: x)

# Define the model
def get_model(num_classes):
    # Load a pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

# Number of classes (including background)
num_classes = len(train_dataset.classes_list) + 1

# Get the model
model = get_model(num_classes)

# Move model to the right device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    i = 0
    for samples in train_loader:
        images = list(sample['image'].to(device) for sample in samples)
        targets = []
        for sample in samples:
            d = {}
            valid_objects = [obj for obj in sample['objects'] if is_valid_bbox(obj['bbox'])]
            if len(valid_objects) == 0:
                continue
            d['boxes'] = torch.tensor([obj['bbox'] for obj in valid_objects], dtype=torch.float32).to(device)
            d['labels'] = torch.tensor([obj['label'] for obj in valid_objects], dtype=torch.int64).to(device)
            targets.append(d)
        
        if len(targets) == 0:
            continue

        # Forward pass
        loss_dict = model(images, targets)

        # Calculate the total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        print(f'Epoch {epoch+1}, Iteration {i+1}, Loss: {losses.item()}')
        i += 1
    
    # Update the learning rate
    lr_scheduler.step()

print("Training complete!")


Epoch 1, Iteration 1, Loss: 4.70062780380249
Epoch 1, Iteration 2, Loss: 2.171436309814453
Epoch 1, Iteration 3, Loss: 1.4171062707901
Epoch 1, Iteration 4, Loss: 1.3254084587097168
Epoch 1, Iteration 5, Loss: 0.9500620365142822
Epoch 1, Iteration 6, Loss: 0.6933348178863525
Epoch 1, Iteration 7, Loss: 0.9877203702926636
Epoch 1, Iteration 8, Loss: 0.537901759147644
Epoch 1, Iteration 9, Loss: 0.404548704624176
Epoch 1, Iteration 10, Loss: 0.5297384858131409
Epoch 1, Iteration 11, Loss: 0.5876920223236084
Epoch 1, Iteration 12, Loss: 0.6600764393806458
Epoch 1, Iteration 13, Loss: 0.567037045955658
Epoch 1, Iteration 14, Loss: 0.33590707182884216
Epoch 1, Iteration 15, Loss: 0.8233029842376709
Epoch 1, Iteration 16, Loss: 0.6091045141220093
Epoch 1, Iteration 17, Loss: 0.4139969050884247
Epoch 1, Iteration 18, Loss: 0.6452269554138184
Epoch 1, Iteration 19, Loss: 0.9290531277656555
Epoch 1, Iteration 20, Loss: 0.5474666357040405
Epoch 1, Iteration 21, Loss: 0.5110000967979431
Epoch 1, 

## Faster RCNN

In [None]:
def collate(batch):
    inputs = [item['spectrum'].unsqueeze(1) for item in batch]  # Add channel dimension
    labels = [torch.tensor([obj['label'] for obj in item['objects']], dtype=torch.float32) for item in batch]
    
    # Assuming `pad_sequence` is from `torch.nn.utils.rnn`
    inputs_padded = pad_sequence(inputs, batch_first=True, padding_value=0.0)
    labels_stacked = pad_sequence(labels, batch_first=True, padding_value=-1)  # Assuming -1 for padding labels
    
    return inputs_padded, labels_stacked


In [None]:
model = models.resnet18(pretrained=False)
num_classes = 6  
model.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10  
dataset = RaddetDataset(all_loaded_RAD_files, all_loaded_GT_files, stereo_files_paths)
dataloader = DataLoader(dataset, batch_size=4, collate_fn=collate)
test_dataset = RaddetDataset(all_loaded_RAD_files, all_loaded_GT_files, stereo_files_paths)
test_dataloader = DataLoader(test_dataset, batch_size=4, collate_fn=collate)  # Adjust batch_size as needed


for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    total_correct = 0
    total_labels = 0

    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        print(inputs)
        print(labels)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        preds = torch.sigmoid(outputs) > 0.5  
        total_correct += (preds == labels).sum().item()
        total_labels += labels.numel()

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = total_correct / total_labels
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train-   Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
    model.eval()  
    running_loss = 0.0
    total_correct = 0
    total_labels = 0
    with torch.no_grad():  
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            test_preds = torch.sigmoid(outputs) > 0.5
            total_correct += (test_preds == labels).sum().item()
            total_labels += labels.numel()

    epoch_loss = running_loss / len(test_dataloader)
    epoch_acc = total_correct / total_labels
    print(f'Test-   Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')



tensor([[[[-15.8332, -27.5944, -22.8197,  ..., -29.1857, -24.1959, -27.7856]],

         [[-33.2669, -22.0209, -21.0198,  ..., -12.4791, -19.2950, -37.5013]],

         [[-36.8685, -10.0651, -16.8682,  ..., -23.9756, -25.5641, -26.2080]],

         ...,

         [[-13.6355, -22.4010, -15.2205,  ..., -20.9831,  -8.2578, -20.8184]],

         [[-14.4204, -24.5969,  -7.7205,  ..., -26.4825, -24.9407, -28.4674]],

         [[-30.8561, -28.0231, -18.4204,  ..., -21.4272, -30.5318, -48.2270]]],


        [[[-10.1008, -19.9613, -15.3482,  ..., -19.0032, -30.9790, -12.6801]],

         [[ -7.1256, -15.8463, -18.0300,  ..., -14.2009, -11.8860,  -7.1965]],

         [[-14.4611, -15.5691, -26.6829,  ..., -15.8769, -10.5550, -26.4153]],

         ...,

         [[ -7.4395, -20.7321,  -9.3639,  ...,  -5.1072,  -3.2042, -21.3909]],

         [[ -3.8068, -21.2065, -11.6436,  ...,  -7.7607,  -9.2954, -23.5890]],

         [[-21.4882, -35.1383, -25.5433,  ..., -21.3057, -33.3178, -21.6531]]],


      

RuntimeError: Given groups=1, weight of size [64, 4, 7, 7], expected input[4, 256, 1, 64] to have 4 channels, but got 256 channels instead

In [None]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # Resize images to 224x224 for ResNet
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for pre-trained ResNet
])

dataset = RaddetDataset(all_loaded_RAD_files, all_loaded_GT_files, stereo_files_paths)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=custom_collate_fn)
num_classes = len(dataset.classes_list)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        inputs = batch['images'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(dataloader)}')


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[16, 480, 1280, 3] to have 3 channels, but got 480 channels instead