In [1]:
import cv2
import torch

In [2]:
def gstreamer_pipeline(
    sensor_id=0,
    capture_width=1280,
    capture_height=720,
    display_width=1280,
    display_height=720,
    framerate=60,
    flip_method=0,
):
    return (
        "nvarguscamerasrc sensor-id=%d ! "
        "video/x-raw(memory:NVMM), width=(int)%d, height=(int)%d, framerate=(fraction)%d/1 ! "
        "nvvidconv flip-method=%d ! "
        "video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
        "videoconvert ! "
        "video/x-raw, format=(string)BGR ! appsink"
        % (
            sensor_id,
            capture_width,
            capture_height,
            framerate,
            flip_method,
            display_width,
            display_height,
        )
    )

In [3]:
def detect(model):
    COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]  # 다양한 색상
    window_title = "YOLOv5 detection"
    inference_times_mean = []

    # To flip the image, modify the flip_method parameter (0 and 2 are the most common)
    print(gstreamer_pipeline(flip_method=0))
    video_capture = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER)
    if video_capture.isOpened():
        try:
            window_handle = cv2.namedWindow(window_title, cv2.WINDOW_AUTOSIZE)
            while True:
                ret_val, frame = video_capture.read()
                if not ret_val:
                    break
                start_event = torch.cuda.Event(enable_timing=True)
                end_event = torch.cuda.Event(enable_timing=True)
                
                with torch.no_grad():
                    start_event.record()
                    results = model(frame, size=1280, augment=True)
                    end_event.record()
                    
                torch.cuda.synchronize()
                elapsed_time_ms = start_event.elapsed_time(end_event)
                
                for *xyxy, conf, cls in results.xyxy[0].tolist():
                    x1, y1, x2, y2 = map(int, xyxy)
                    label = f"{results.names[int(cls)]} {conf:.2f}"
                    color = COLORS[int(cls) % len(COLORS)]
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                    label_x1, label_y1 = x1, y1 - label_size[1] - 10
                    label_x2, label_y2 = x1 + label_size[0] + 10, y1
                    cv2.rectangle(frame, (label_x1, label_y1), (label_x2, label_y2), color, -1)
                    cv2.putText(frame, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
                cv2.imshow(window_title, frame)
                print('inference time : {}ms'.format(elapsed_time_ms))
                inference_times_mean.append(elapsed_time_ms)
                keyCode = cv2.waitKey(10) & 0xFF
                # Stop the program on the ESC key or 'q'
                if keyCode == 27 or keyCode == ord('q'):
                    break
        finally:
            video_capture.release()
            cv2.destroyAllWindows()
            print("\nMean inference time : {}ms".format(sum(inference_times_mean[5:]) / len(inference_times_mean[5:])))
    else:
        print("Error: Unable to open camera")

In [4]:
import os

def prune(model, amount=0.3):
    import torch.nn.utils.prune as prune
    print('Pruning model... ', end='')
    for name, m in model.named_modules():
        if isinstance(m, torch.nn.Conv2d):
            prune.l1_unstructured(m, name='weight', amount=amount)  # prune
            prune.remove(m, 'weight')  # make permanent
    print(' %.3g global sparsity' % sparsity(model))
    return model
            
def sparsity(model):
    # Return global model sparsity
    a, b = 0, 0
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a

def quantize(model):
    model = model.half()  # 모델의 가중치를 FP16으로 변환
    for layer in model.modules():
        if isinstance(layer, torch.nn.BatchNorm2d):
            layer.float()  # BatchNorm 레이어는 FP32로 유지
    return model

def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

In [35]:
def real_prune(model, pruning_ratio):
    import torch_pruning as tp
    #print(model.model)
    for p in model.parameters():
        p.requires_grad_(True)

    example_inputs = torch.randn(1, 3, 640, 640).to(device)
    imp = tp.importance.MagnitudeImportance(p=2) # L2 norm pruning

    ignored_layers = []
    from models.yolo import Detect
    from models.common import Conv
    for m in model.model.modules():
        if isinstance(m, Detect):
            ignored_layers.append(m)
    #print(ignored_layers)

    iterative_steps = 1 # progressive pruning
    pruner = tp.pruner.MetaPruner(
        model.model,
        example_inputs,
        importance=imp,
        global_pruning=True,
        iterative_steps=iterative_steps,
        pruning_ratio=pruning_ratio, # remove 50% channels, ResNet18 = {64, 128, 256, 512} => ResNet18_Half = {32, 64, 128, 256}
        ignored_layers=ignored_layers,
        round_to=4,
    )

    
    base_macs, base_nparams = tp.utils.count_ops_and_params(model, example_inputs)
    pruner.step()

    pruned_macs, pruned_nparams = tp.utils.count_ops_and_params(model, example_inputs)
    #print(model)
    print("Before Pruning: MACs=%f G, #Params=%f G"%(base_macs/1e9, base_nparams))
    print("After Pruning: MACs=%f G, #Params=%f G"%(pruned_macs/1e9, pruned_nparams))
    return model

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda:0


In [7]:
%cd ~/Documents/Glasses_Detection_with_YOLOv5/

/home/jetson/Documents/Glasses_Detection_with_YOLOv5


In [36]:
model = torch.hub.load('yolov5', 'custom', 'finetuned_weights/yolov5n_finetuned.pt', source='local', force_reload=True, device=device)
#model = torch.hub.load('ultralytics/yolov5', "custom", "models/best.pt", force_reload=True)


YOLOv5 🚀 8c2ab6f torch 1.8.0 CUDA:0 (NVIDIA Tegra X1, 3963MiB)

Fusing layers... 
Model Summary: 213 layers, 1761871 parameters, 0 gradients, 4.1 GFLOPs
Adding AutoShape... 


In [22]:
model = prune(model, amount=0.5)

Pruning model...  0.499 global sparsity


In [37]:
model = real_prune(model, pruning_ratio=0.1)
#model = quantize(model)

Before Pruning: MACs=2.079101 G, #Params=1761871.000000 G
After Pruning: MACs=1.491328 G, #Params=1459187.000000 G


In [42]:
detect(model)

nvarguscamerasrc sensor-id=0 ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720, framerate=(fraction)60/1 ! nvvidconv flip-method=0 ! video/x-raw, width=(int)1280, height=(int)720, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink
inference time : 6156.8359375ms
inference time : 5100.8525390625ms
inference time : 504.8294677734375ms
inference time : 373.4302673339844ms
inference time : 355.9434509277344ms
inference time : 525.6057739257812ms
inference time : 358.2197265625ms
inference time : 346.3018798828125ms
inference time : 351.9606628417969ms
inference time : 351.1159362792969ms
inference time : 352.2029724121094ms
inference time : 352.6986389160156ms
inference time : 348.7313537597656ms
inference time : 345.0692138671875ms
inference time : 351.6027526855469ms
inference time : 347.80078125ms
inference time : 351.8130798339844ms
inference time : 348.33270263671875ms
inference time : 346.4955749511719ms
inference time : 350.446044921875ms
infer

In [34]:
import os
from pathlib import Path
from PIL import Image

COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]  # 다양한 색상
inference_times_mean = []
image_folder = './test'
output_folder = './test/result'
os.makedirs(output_folder, exist_ok=True)

image_files = list(Path(image_folder).rglob('*.jpg'))
model.eval()
model.conf=0.01

for image_file in image_files:
    image_file = Path('./test/test_1.jpg')
    img = Image.open(image_file)

    start_event = torch.cuda.Event(enable_timing=True)
    end_event = torch.cuda.Event(enable_timing=True)

    with torch.no_grad():
        start_event.record()
        results = model(img, size=1280, augment=True)
        end_event.record()

    torch.cuda.synchronize()
    elapsed_time_ms = start_event.elapsed_time(end_event)

    results_img_path = os.path.join(output_folder, image_file.name)
    results.save(save_dir=output_folder)

    print('inference time : {}ms'.format(elapsed_time_ms))
    inference_times_mean.append(elapsed_time_ms)
    break

print("\nMean inference time : {}ms".format(sum(inference_times_mean) / len(inference_times_mean)))

Saved 1 image to [1mtest/result[0m


inference time : 3722.6640625ms

Mean inference time : 3722.6640625ms
