# Model

In [1]:
# %tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)

2.5.0


In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
import collections
# from google.colab.patches import cv2_imshow

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")

# Import utilites
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [3]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [4]:
# Grab path to current working directory
CWD_PATH = os.getcwd()

# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = 'shoeDetection/Python inference/frozen_inference_graph.pb'

# Path to label map file
PATH_TO_LABELS = 'shoeDetection/Python inference/label_map.pbtxt'

# Path to image
PATH_TO_IMAGE_FOLDER = 'shoeDetection/Python inference/'

# Number of classes the object detector can identify
NUM_CLASSES = 1

# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.compat.v1.GraphDef()
    with tf.compat.v1.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.compat.v1.Session(graph=detection_graph)

# Define input and output tensors (i.e. data) for the object detection classifier

# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')


# Audio

In [5]:
import cv2
import numpy as np
import time
import mido

In [6]:
camera_ID =0

## effect description
effects = {
    'disortion':{
        'x':10,
        'y':50,
        'w':280,
        'h':180,
        'isExpressive':False,
        'effectName':"disortion",
        'color':(255,255,0),
        'state':0,
        'control' : 70
        
    },
    'delay':{
        'x':320,
        'y':50,
        'w':280,
        'h':200,
        'isExpressive':True,
        'effectName':"delay",
        'control_x' : 71,
        'control_y' : 72,
        'control': 73,
        'color':(255,0,255),
        'state':0
    }
}


## init for previous points

prev_x , prev_y = (0,0)



screen_w = 640
screen_h = 480

output = mido.open_output('MIDI Bus 1')

In [7]:
prev_effects = {
    'feedback_prev':0,
    'delay_prev':0
}

In [8]:
mido.get_output_names()

['MIDI Bus 1', 'MIDI Bus 1']

In [9]:
def processBoard(img,input_x, input_y):
    for effectName, effect in effects.items():
        processInput(effectName,effect,input_x,input_y)

In [10]:
def checkInBoundaries(x,y,effect):
    if  x > effect['x'] and x < effect['x'] + effect['w'] and y > effect['y'] and  y > effect['y'] and y < effect['y']+effect['h']:
        bInBoundaries = True
    else:
        bInBoundaries = False
    return bInBoundaries

In [11]:
def processInput(effectName, effect,x,y):
    #print(f'x coordinate is {x} , y coordinate is {y}')
    
    ## if the current point is in effect boundaries
    if checkInBoundaries(x,y,effect): 
        # check if the point was in there the last frame
        #don't switch states, check if effect is expressive 
        if checkInBoundaries(prev_x,prev_y,effect):
            if effect['isExpressive']:
            ## change the effect expressive values instead
            ##TODO
                changeExpressive(effect,x,y)
            else:
            ## do nothing, still in the boundaries
                pass
        else:
            #increment the state
            effect['state']= (effect['state'] +1) % 4
            processEffect(effect)
            print(f'effect {effectName} is in state {effect["state"]} ')
    else:
        ## check if the point was previously in 
        if checkInBoundaries(prev_x,prev_y,effect):
            #increment the state
            effect['state']= (effect['state'] +1) % 4
            processEffect(effect)
            print(f'effect {effectName} is in state {effect["state"]} ')
        else:
        ## nothing changed
            pass

In [12]:
def changeExpressive(effect,input_x,input_y):
    message = None
    if effect['isExpressive']:
        if effect['effectName'] == 'delay':
            x_adjusted = (input_x -effect['x'])/effect['w']
            y_adjusted = (input_y -effect['y'])/effect['h']

            if abs(prev_effects['delay_prev']-x_adjusted)>0.01:
                message = mido.Message('control_change',control=effect['control_x'],value=int(x_adjusted * 127))
                prev_effects['delay_prev']=x_adjusted
            if abs(prev_effects['feedback_prev']-y_adjusted)>0.01:
                message =  mido.Message('control_change',control=effect['control_y'],value=int(127 - y_adjusted * 127))
                prev_effects['feedback_prev']=y_adjusted
                
    
    if message:
        output.send(message)


In [13]:
def processEffect(effect,input_x=False,input_y=False):
    message = None
    if effect['effectName'] == 'disortion':
        print('processing disortion')
        if effect['state'] == 0 or effect['state'] ==3:
            ## turn off the effect
            message = mido.Message('control_change',control=effect['control'],value=127)
        else:
            message = mido.Message('control_change',control=effect['control'],value=0)
    elif effect['effectName'] == 'delay':
        print('processing delay')
        if effect['state'] == 0 or effect['state'] ==3:
            ## turn off the effect
            message = mido.Message('control_change',control=effect['control'],value=127)
        elif effect['state'] == 1:
            message = mido.Message('control_change',control=effect['control'],value=0)
            
    if message:
        output.send(message)


In [14]:




cap = cv2.VideoCapture(camera_ID)

count = 0
background=0


In [15]:
def return_coordinates(boxes,width,height):
    coordinates_list = [] 
    for box in boxes:
        ymin, xmin, ymax, xmax = box
        ymin = int(ymin*height)
        ymax = int(ymax*height)
        xmin = int(xmin*width)
        xmax = int(xmax*width)
        coordinates_list.append([ymin, ymax, xmin, xmax])


    return coordinates_list

In [None]:
while(cap.isOpened()):
    ret, img = cap.read()
    if not ret:
        break
        
#     img = cv2.flip(img, 0)

    
    count+=1
    
    img=cv2.resize(img,(screen_w,screen_h))
    
    image_expanded = np.expand_dims(img, axis=0)
    
    for effectName, effect in effects.items():
        cv2.rectangle(img,(effect['x'],effect['y']),
                      (effect['x']+effect['w'],effect['y']+effect['h']),effect['color'], 3)

    
    (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_expanded})
    label_id_offset = 1
    
    
   
    input_x,input_y = (prev_x , prev_y)
    area = 0
    cv2.circle(img,(input_x,input_y), 5, (255,0,0), -1)

    coordinates = return_coordinates(np.squeeze(boxes),screen_w,screen_h)
    

    highest_score_index = np.argmax(np.squeeze(scores))
    
    
    if np.squeeze(scores)[highest_score_index] > 0.45:
    
        y_min, y_max, x_min, x_max = coordinates[highest_score_index]

        x= x_min
        y= y_min
        h = abs(y_max -y_min)
        w = abs(x_max - x_min)
        input_x = int((x_max + x_min) /2)
        input_y = y
        cv2.circle(img,(prev_x,prev_y), 5, (255,0,0), -1)
        cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255), 2)

        ## Draw the input circle now
        cv2.circle(img,(input_x,input_y), 10, (255,128,128), -1)
        processBoard(img,input_x,input_y)

        # keep current x,y pos for next frame
        prev_x , prev_y = (input_x,input_y)
    
    cv2.imshow("ARBoard",img)

    k = cv2.waitKey(10)
    if k == 27:
        break

processing disortion
effect disortion is in state 1 
processing disortion
effect disortion is in state 2 
processing disortion
effect disortion is in state 3 
processing disortion
effect disortion is in state 0 
processing disortion
effect disortion is in state 1 
processing disortion
effect disortion is in state 2 
processing delay
effect delay is in state 1 
processing delay
effect delay is in state 2 
processing delay
effect delay is in state 3 
processing delay
effect delay is in state 0 
processing delay
effect delay is in state 1 
processing delay
effect delay is in state 2 
processing delay
effect delay is in state 3 
processing delay
effect delay is in state 0 
processing disortion
effect disortion is in state 3 
processing disortion
effect disortion is in state 0 
processing disortion
effect disortion is in state 1 
processing disortion
effect disortion is in state 2 
processing delay
effect delay is in state 1 
processing delay
effect delay is in state 2 
processing disortion