In [1]:
import threading
import numpy as np
import cv2
from pykinect2 import PyKinectRuntime
from pykinect2.PyKinectV2 import FrameSourceTypes_Color
from PointCloud import Cloud


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
import sys
sys.path.append('..\\..\\CV Model\\')


In [3]:
# from finetune import create_model

In [4]:
import torch
from torchvision.transforms import functional as F
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
NVIDIA GeForce RTX 4070


In [5]:
# COCO class labels for torchvision models
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [6]:
def create_model(num_classes):
    model = maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    
    hidden_layer = 256
    
    model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
    return model


In [7]:
def load_model():
    # model = create_model(3)  # Number of classes
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using {device}")
    
    # model_path = '..\\..\\CV Model\\chkpt\\model.pth'
    # model.load_state_dict(torch.load(model_path, map_location=device))
    state_dict_path = "..\\..\\CV Model\\final_model.pth"



    model = create_model(2)
    model.load_state_dict(torch.load(state_dict_path))
    # model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.to(device) 
    model.eval()
    return model, device


def process_frame(frame, model, device):
    # Resize frame to make processing more efficient
    # resized_frame = cv2.resize(frame, (800, 800))

    # Transform the frame to tensor and scale it
    frame_tensor = F.to_tensor(frame)
    frame_tensor = frame_tensor.to(torch.float32).to(device)

    # Correctly format the tensor as a list of 3D tensors [C, H, W]
    frame_tensor = frame_tensor.unsqueeze(0)  # Add a batch dimension if it's not already there

    # Ensure the tensor is passed as a list of images 
    frame_tensor_list = [frame_tensor.squeeze(0)]  # This changes it from [1, 3, H, W] to [3, H, W] and wraps in a list

    with torch.no_grad():
        predictions = model(frame_tensor_list)[0] 

    return predictions


def display_predictions(frame, predictions):
    labels = predictions['labels']
    boxes = predictions['boxes']
    scores = predictions['scores']

    for box, label, score in zip(boxes, labels, scores):
        label_name = COCO_INSTANCE_CATEGORY_NAMES[label.item()]
        if score > 0.1:  # Check if the label is "remote" and confidence > 0.85
            x1, y1, x2, y2 = map(int, box)
            label_name = COCO_INSTANCE_CATEGORY_NAMES[label.item()]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Draw rectangle
            cv2.putText(frame, f'{label_name}: {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)  # Display label and score

    return frame

In [8]:
def rgb_capture(kinect, model, device):
    print("Starting color frame capture. Press 'q' to quit.")

    while True:
        if kinect.has_new_color_frame():
            color_frame = kinect.get_last_color_frame()
            color_image = color_frame.reshape((kinect.color_frame_desc.Height,
                                               kinect.color_frame_desc.Width, 4)).astype(np.uint8)
            
            rgb_image = cv2.cvtColor(color_image, cv2.COLOR_RGBA2RGB)

            # Process frame through model
            predictions = process_frame(rgb_image, model, device)
            rgb_image_with_preds = display_predictions(rgb_image, predictions)

            cv2.imshow('Kinect RGB Stream with CNN Predictions', rgb_image_with_preds)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            # print("Waiting for new frame...")
            pass
        

In [9]:
def point_cloud_visualization():
    # pcl = Cloud(dynamic=True, simultaneously=True, depth=True, color=True, body=False, skeleton=False, color_overlay=False)
    # pcl.visualize()
    pass
    

In [10]:
def main():
    # Initialize the Kinect with color and depth frame support
    kinect = PyKinectRuntime.PyKinectRuntime(FrameSourceTypes_Color)
    model, device = load_model()

    # Start the RGB capture thread
    rgb_thread = threading.Thread(target=rgb_capture, args=(kinect, model, device))
    rgb_thread.start()

    # Start the point cloud visualization in the main thread
    point_cloud_visualization()

    # Wait for the RGB thread to finish
    rgb_thread.join()

    # Cleanup
    kinect.close()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
    

Using cuda




Starting color frame capture. Press 'q' to quit.
