In [1]:
# coding: utf-8
import numpy as np
import os
import cv2
import sys
from pylibfreenect2 import Freenect2, SyncMultiFrameListener
from pylibfreenect2 import FrameType, Registration, Frame
from pylibfreenect2 import createConsoleLogger, setGlobalLogger
from pylibfreenect2 import LoggerLevel

In [4]:
try:
    from pylibfreenect2 import OpenGLPacketPipeline
    pipeline = OpenGLPacketPipeline()
except:
    try:
        from pylibfreenect2 import OpenCLPacketPipeline
        pipeline = OpenCLPacketPipeline()
    except:
        from pylibfreenect2 import CpuPacketPipeline
        pipeline = CpuPacketPipeline()
print("Packet pipeline:", type(pipeline).__name__)

# Create and set logger
logger = createConsoleLogger(LoggerLevel.Debug)
setGlobalLogger(logger)

fn = Freenect2()
num_devices = fn.enumerateDevices()
if num_devices == 0:
    print("No device connected!")
    sys.exit(1)

serial = fn.getDeviceSerialNumber(0)
device = fn.openDevice(serial, pipeline=pipeline)

listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir | FrameType.Depth)

# Register listeners
device.setColorFrameListener(listener)
device.setIrAndDepthFrameListener(listener)

device.start()

Packet pipeline: OpenGLPacketPipeline


In [5]:
# NOTE: must be called after device.start()
registration = Registration(device.getIrCameraParams(),
                            device.getColorCameraParams())

# Optinal parameters for registration
# set True if you need
need_bigdepth = False
need_color_depth_map = False

#bigdepth = Frame(1920, 1082, 3) if need_bigdepth else None
color_depth_map = np.zeros((424, 512),  np.int32).ravel() \
    if need_color_depth_map else None

In [3]:
# For Model
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2

import random
import math
import skimage.io
import matplotlib

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed to display the images.
%matplotlib inline
sys.path.append("..")

# Root directory of the project
ROOT_DIR = os.getcwd()

# Import Mask RCNN# Import 
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# Import SUN config
sys.path.append(os.path.join(ROOT_DIR, "samples/sun/"))  # To find local version
import sun

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
SUN_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_sun.h5")

# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "images")

Using TensorFlow backend.


In [5]:
class InferenceConfig(sun.SunConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                26
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTA

# Image segmentation

In [6]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-SUN
model.load_weights(SUN_MODEL_PATH, by_name=True)

In [7]:
class_names = ['BG', 'bed', 'books', 'ceiling', 'chair', 'floor',
               'furniture', 'objects', 'picture', 'sofa', 'table',
               'tv', 'wall', 'window']

In [8]:
def  random_colors(N):
    np.random.seed(1)
    colors = [tuple(255 * np.random.rand(3)) for _ in range(N)]
    return colors

def apply_mask(image, mask, color, alpha=0.5):
    """apply mask to image"""
    for n, c in enumerate(color):
        image[:, :, n] = np.where(
            mask == 1,
            image[:, :, n] * (1 - alpha) + alpha * c,
            image[:, :, n]
        )
    return image

def display_instances(image, boxes, masks, ids, names, scores):
    """
        take the image and results and apply the mask, box, and Label
    """
    n_instances = boxes.shape[0]
    colors = random_colors(n_instances)

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]

    for i, color in enumerate(colors):
        if not np.any(boxes[i]):
            continue

        y1, x1, y2, x2 = boxes[i]
        label = names[ids[i]]
        score = scores[i] if scores is not None else None
        caption = '{} {:.2f}'.format(label, score) if score else label
        mask = masks[:, :, i]

        image = apply_mask(image, mask, color)
        image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        image = cv2.putText(image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2)

    return image

# Loading device

In [None]:
try:
    from pylibfreenect2 import OpenGLPacketPipeline
    pipeline = OpenGLPacketPipeline()
except:
    try:
        from pylibfreenect2 import OpenCLPacketPipeline
        pipeline = OpenCLPacketPipeline()
    except:
        from pylibfreenect2 import CpuPacketPipeline
        pipeline = CpuPacketPipeline()
print("Packet pipeline:", type(pipeline).__name__)

# Create and set logger
logger = createConsoleLogger(LoggerLevel.Debug)
setGlobalLogger(logger)

fn = Freenect2()
num_devices = fn.enumerateDevices()
if num_devices == 0:
    print("No device connected!")
    sys.exit(1)

serial = fn.getDeviceSerialNumber(0)
device = fn.openDevice(serial, pipeline=pipeline)

listener = SyncMultiFrameListener(
    FrameType.Color | FrameType.Ir | FrameType.Depth)

# Register listeners
device.setColorFrameListener(listener)
device.setIrAndDepthFrameListener(listener)

device.start()

In [None]:
# NOTE: must be called after device.start()
registration = Registration(device.getIrCameraParams(),
                            device.getColorCameraParams())

undistorted = Frame(512, 424, 4)
registered = Frame(512, 424, 4)

# Optinal parameters for registration
# set True if you need
need_bigdepth = False
need_color_depth_map = False

bigdepth = Frame(1920, 1082, 4) if need_bigdepth else None
color_depth_map = np.zeros((424, 512),  np.int32).ravel() \
    if need_color_depth_map else None

# Working with a video stream from Kinect

In [7]:
while True:
    frames = listener.waitForNewFrame()
    frame = frames["color"]
    frame = frame.asarray(np.uint8)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    results = model.detect([frame], verbose=1)
    # Visualize results
    r = results[0]
    frame = display_instances(frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
    
    cv2.imshow('image segmentation', cv2.resize(frame, (800,600)))
    #listener.release(frame)

    key = cv2.waitKey(delay=1)
    if key == ord('q'):
        break

device.stop()
device.close()
sys.exit(0)

get image
1
2
get image
2
3
get image
3
4
get image
4
5
get image
5
6
get image
6
7
get image
7
8
get image
8
9
get image
9
10
get image
10
11
get image
11
12
get image
12
13
get image
13
14
get image
14
15
get image
15
16
get image
16
17
get image
17
18
get image
18
19
get image
19
20
get image
20
21
get image
21
22
get image
22
23
get image
23
24
get image
24
25
get image
25
26
get image
26
27
get image
27
28
get image
28
29
get image
29
30
get image
30
31
get image
31
32
get image
32
33
get image
33
34
get image
34
35
get image
35
36
get image
36
37
get image
37
38
get image
38
39
get image
39
40
get image
40
41
get image
41
42
get image
42
43
get image
43
44
get image
44
45
get image
45
46
get image
46
47
get image
47
48
get image
48
49
get image
49
50
get image
50
51
get image
51
52
get image
52
53
get image
53
54
get image
54
55
get image
55
56
get image
56
57
get image
57
58
get image
58
59
get image
59
60
get image
60
61
get image
61
62
get image
62
63
get image
63
64
get image

get image
469
470
get image
470
471
get image
471
472
get image
472
473
get image
473
474
get image
474
475
get image
475
476
get image
476
477
get image
477
478
get image
478
479
get image
479
480
get image
480
481
get image
481
482
get image
482
483
get image
483
484
get image
484
485
get image
485
486
get image
486
487
get image
487
488
get image
488
489
get image
489
490
get image
490
491
get image
491
492
get image
492
493
get image
493
494
get image
494
495
get image
495
496
get image
496
497
get image
497
498
get image
498
499
get image
499
500
get image
500
501
get image
501
502
get image
502
503
get image
503
504
get image
504
505
get image
505
506
get image
506
507
get image
507
508
get image
508
509
get image
509
510
get image
510
511
get image
511
512
get image
512
513
get image
513
514
get image
514
515
get image
515
516
get image
516
517
get image
517
518
get image
518
519
get image
519
520
get image
520
521
get image
521
522
get image
522
523
get image
523
524
get image


KeyboardInterrupt: 

# Working with writing all color frames on disc

In [14]:
import os
dirname = 'videos'
os.mkdir(dirname)

In [None]:
a = 1
while True:
    frames = listener.waitForNewFrame()
    #os.remove("/Users/ekaterina/Desktop/diploma/object_detection/frame.png")
    color = frames["color"].asarray(np.uint8)
    color = cv2.cvtColor(color, cv2.COLOR_BGRA2BGR)
    
    image_np_expanded = np.expand_dims(color, axis=0)
    #cv2.imshow('object detection', color)
    frame_name = "frame" + str(a) + ".png"
    cv2.imwrite(os.path.join(dirname, frame_name), cv2.resize(color, (1280,720)))
    cv2.imshow('object detection', color)
    a += 1
    
    key = cv2.waitKey(0) & 0xFF
    if key == ord('q'):
        break
device.stop()
device.close()

sys.exit(0)