In [1]:
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2

In [2]:
from keras.models import load_model

Using TensorFlow backend.


In [3]:
def binarize_inverse(img):
    ret, bin_img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    return bin_img

In [4]:
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
    "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
    "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
    "sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

def ml_obj_det(frame):
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
        0.007843, (300, 300), 127.5)

    net.setInput(blob)
    detections = net.forward()
    
    for i in np.arange(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.7:
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # draw the prediction on the frame
            label = "{}: {:.2f}%".format(CLASSES[idx],
                confidence * 100)
            cv2.rectangle(frame, (startX, startY), (endX, endY),
                COLORS[idx], 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(frame, label, (startX, y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
    return frame

In [5]:
def line_detection(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize = 3)
    minLineLength = 300
    maxLineGap = 10
    
    lines = cv2.HoughLinesP(edges,1,np.pi/2,100,minLineLength,maxLineGap)
    
    for pt in lines:
        x1,y1,x2,y2 = pt[0]
        if x2-x1>y2-y1:
            cv2.line(frame, (x1,y1),(x2,y2),(0,255,0),2)
    return frame

In [6]:
def remove_lines(frame, vertical_size = 500):
    bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    bw = binarize_inverse(bw)
    horizontal = np.copy(bw)
    vertical = np.copy(bw)
    # Specify size on horizontal axis
    cols = horizontal.shape[1]
    horizontal_size = int(cols/30)
    # Create structure element for extracting horizontal lines through morphology operations
    horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
    # Apply morphology operations
    horizontal = cv2.erode(horizontal, horizontalStructure)
    horizontal = cv2.dilate(horizontal, horizontalStructure)
    
    # Specify size on vertical axis
    rows = vertical.shape[0]
    verticalsize = int(rows/vertical_size)
    # Create structure element for extracting vertical lines through morphology operations
    verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
    # Apply morphology operations
    vertical = cv2.erode(vertical, verticalStructure)
    vertical = cv2.dilate(vertical, verticalStructure)
    # Inverse vertical image
    vertical = cv2.bitwise_not(vertical)

    # Step 1
    edges = cv2.adaptiveThreshold(vertical, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \
                                cv2.THRESH_BINARY, 3, -2)
    # Step 2
    kernel = np.ones((2, 2), np.uint8)
    edges = cv2.dilate(edges, kernel)
    # Step 3
    smooth = np.copy(vertical)
    # Step 4
    smooth = cv2.blur(smooth, (2, 2))
    # Step 5
    (rows, cols) = np.where(edges != 0)
    vertical[rows, cols] = smooth[rows, cols]
    # Show final result
    return vertical

In [7]:
model = load_model('../SheetClassification/mobilenetv2.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [20]:
def detect_sheet(frame):
    ipimg = np.array([cv2.resize(frame, (224, 224))])
    output = model.predict(ipimg).flatten()
    string = ""
    if output[0] > output[1]:
        string = "Please point the camera to a sheet music."
    else:
        string = "Thank You. Processing..."
    frame = cv2.putText(frame, string, (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
    return frame

In [24]:
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe('./Caffe/deploy.prototxt', './Caffe/mobilenet_iter_73000.caffemodel')

[INFO] loading model...


In [25]:
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
fps = FPS().start()

[INFO] starting video stream...


In [26]:
while(True):
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    frame = ml_obj_det(frame)
#     frame = line_detection(frame)

#     frame = detect_sheet(frame)

    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == ord("q"):
        break
        
    fps.update()

fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
 
cv2.destroyAllWindows()
vs.stop()

[INFO] elapsed time: 834.87
[INFO] approx. FPS: 9.33


In [74]:
def show(img):
    cv2.imshow('image', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [128]:
img = cv2.imread('../ScanLineDetection/images/img1.jpg')

In [129]:
show(img)

In [133]:
show(remove_lines(img))

In [61]:
img.shape

(1651, 1275)