#### ESP32 Stereo Camera Code with navigation

In [1]:
import copy
import math
import time
import requests

import numpy as np
import cv2
import matplotlib.pyplot as plt
import scipy
import scipy.optimize
import torch
import torchvision
import torchvision.transforms.functional as tvtf
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights,MaskRCNN_ResNet50_FPN_V2_Weights

#this is the file with auxillary functions. stereo_image_utils.py. Should be in the same
#directory as the notebook
import stereo_image_utils
from stereo_image_utils import get_detections, get_cost, draw_detections, annotate_class2 
from stereo_image_utils import get_horiz_dist_corner_tl, get_horiz_dist_corner_br, get_dist_to_centre_tl, get_dist_to_centre_br, get_dist_to_centre_cntr

In [2]:
URL_left = "http://192.168.1.181"
URL_right = "http://192.168.1.129"
URL_car = "http://192.168.1.182"
AWB = True
cnt = 0
moved = False
total_angle = 0
brk = False
#focal length. Pre-calibrated in stereo_image_v6 notebook
fl = 2.043636363636363
tantheta = 0.7648732789907391-0.1
starttime = time.time()


In [3]:
weights=MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT

COLOURS = [
    tuple(int(colour_hex.strip('#')[i:i+2], 16) for i in (0, 2, 4))
    for colour_hex in plt.rcParams['axes.prop_cycle'].by_key()['color']
]

In [4]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=weights)
_ = model.eval()

In [5]:
#capture the images
# cap_left = cv2.VideoCapture(URL_left + ":81/stream")
# cap_right = cv2.VideoCapture(URL_right + ":81/stream")

In [6]:
#functions for the command handler

def set_resolution(url: str, index: int=1, verbose: bool=False):
    try:
        if verbose:
            resolutions = "10: UXGA(1600x1200)\n9: SXGA(1280x1024)\n8: XGA(1024x768)\n7: SVGA(800x600)\n6: VGA(640x480)\n5: CIF(400x296)\n4: QVGA(320x240)\n3: HQVGA(240x176)\n0: QQVGA(160x120)"
            print("available resolutions\n{}".format(resolutions))

        if index in [10, 9, 8, 7, 6, 5, 4, 3, 0]:
            requests.get(url + "/control?var=framesize&val={}".format(index))
        else:
            print("Wrong index")
    except:
        print("SET_RESOLUTION: something went wrong")

def set_quality(url: str, value: int=1, verbose: bool=False):
    try:
        if value >= 10 and value <=63:
            requests.get(url + "/control?var=quality&val={}".format(value))
    except:
        print("SET_QUALITY: something went wrong")

def set_awb(url: str, awb: int=1):
    try:
        awb = not awb
        requests.get(url + "/control?var=awb&val={}".format(1 if awb else 0))
    except:
        print("SET_QUALITY: something went wrong")
    return awb

def set_angle(url: str, angle: int):
    try:
        requests.get(url + "/action?angle={}".format(angle))
    except:
        print("SET_ANGLE: something went wrong")


def set_distance(url: str, dist: int):
    try:
        requests.get(url + "/action?distance={}".format(dist))
    except:
        print("SET_ANGLE: something went wrong")
#26 37 38

def set_speed(url: str, speed: int):
    try:
        requests.get(url + "/slider?value={}".format(speed))
    except:
        print("SET_SPEED: something went wrong")

def object_upright(coords):
    return (abs(coords[0] - coords[2]) < abs(coords[1] - coords[3]))


In [7]:
set_speed(URL_car, 255)

In [8]:
# set_angle(URL_car, int(-8.8998))

In [None]:
if __name__ == '__main__':
    set_resolution(URL_left, index=8)
    set_resolution(URL_right, index=8)
    set_speed(URL_car, 230)
    
    time.sleep(5)
    

    while True:
        mov_angle = []
        mov_dists = []
#         print('here')
        ### capture the images
#         cap_left.release()
#         cap_right.release()

        cap_left = cv2.VideoCapture(URL_left + ":81/stream")

        cap_right = cv2.VideoCapture(URL_right + ":81/stream")
        if cap_left.isOpened():
            ret_l, frame_l = cap_left.read()
            #release the capture to stop a queu building up. I'm sure there are more efficient ways to do this.
            cap_left.release()
            
            if ret_l:
                cv2.imshow("left_eye", frame_l) 
#             else:
#                 cap_left.release()
#                 cap_left = cv2.VideoCapture(URL_left + ":81/stream")
                
        if cap_right.isOpened():
            ret_r, frame_r = cap_right.read()
            cap_right.release()

            if ret_r:
                cv2.imshow("right_eye", frame_r) 
#             else:
#                 cap_right.release()
#                 cap_right = cv2.VideoCapture(URL_right + ":81/stream")
        
        if ret_r and ret_l :
            #do stereo matching
            imgs = [cv2.cvtColor(frame_l, cv2.COLOR_BGR2RGB),cv2.cvtColor(frame_r, cv2.COLOR_BGR2RGB)]
            if cnt == 0:
#                 cnt = 0
                
                #do the inference
                det, lbls, scores, masks = get_detections(model,imgs,score_threshold=0.5)
                if(len(det[0])==0 or len(det[1])==0):
                    set_angle(URL_car, 7) ##move five degrees and check environment again.
                    total_angle += 7
                    continue

                sz1 = frame_r.shape[1]
                centre = sz1/2
        
                #print out inference results
#                 print(det)
                print(np.array(weights.meta["categories"])[lbls[0]])
                print(np.array(weights.meta["categories"])[lbls[1]])
                cost = get_cost(det, lbls = lbls,sz1 = centre)
                
                #do the tracking from left eye to right eye.
                tracks = scipy.optimize.linear_sum_assignment(cost)

                dists_tl =  get_horiz_dist_corner_tl(det)
                dists_br =  get_horiz_dist_corner_br(det)

                final_dists = []
                dctl = get_dist_to_centre_tl(det[0],cntr = centre)
                dcbr = get_dist_to_centre_br(det[0], cntr = centre)
                
                #measure distance of object from the centre so I can see how far I need to turn.
                d0centre = get_dist_to_centre_cntr(det[0], cntr = centre)
                d1centre = get_dist_to_centre_cntr(det[1], cntr = centre)
                
                #find the angle I need 
                for i, j in zip(*tracks):
                    if (np.array(weights.meta["categories"])[lbls[0]][i])=='bottle':
                        print('is bottle')
                        #check if bottle is till upright
                        if object_upright(det[0][i]):
                            print('object upright')
#                             break
                            angle = (d0centre[i]+d1centre[j])/sz1*9 #15 worked well in experiments can play around with this.
                            # if objects are all the way to the right, then turn 15*2 30 degrees right
                            mov_angle.append(int(angle))
                            print(angle)
                        else:
                            print('object flat')
#                             break
                    if (lbls[0][i]==lbls[1][j] or True):
                        if dctl[i] < dcbr[i]:
                            final_dists.append((dists_tl[i][j],np.array(weights.meta["categories"])[lbls[0]][i]))

                        else:
                            final_dists.append((dists_br[i][j],np.array(weights.meta["categories"])[lbls[0]][i]))
                    
                    else: #put zero if they are different objects.
                        final_dists.append((0,np.array(weights.meta["categories"])[lbls[0]][i]))
                
                #final distances as list
                
                fd = [i for (i,j) in final_dists]
                #find distance away
                dists_away = (7.05/2)*sz1*(1/tantheta)/np.array((fd))+fl
                cat_dist = []
                for i in range(len(dists_away)):
                    if (np.array(weights.meta["categories"])[lbls[0]][(tracks[0][i])])=='bottle':
                        mov_dists.append(dists_away[i])
                    cat_dist.append(f'{np.array(weights.meta["categories"])[lbls[0]][(tracks[0][i])]} {dists_away[i]:.1f}cm')
                    print(f'{np.array(weights.meta["categories"])[lbls[0]][(tracks[0][i])]} is {dists_away[i]:.1f}cm away')
                t1 = [list(tracks[1]), list(tracks[0])]
                frames_ret = []
                for i, imgi in enumerate(imgs):
                    img = imgi.copy()
                    deti = det[i].astype(np.int32)
                    draw_detections(img,deti[list(tracks[i])], obj_order=list(t1[1]))
                    annotate_class2(img,deti[list(tracks[i])],lbls[i][list(tracks[i])],cat_dist)
                    frames_ret.append(img)
                cv2.imshow("left_eye", cv2.cvtColor(frames_ret[0],cv2.COLOR_RGB2BGR))
                cv2.imshow("right_eye", cv2.cvtColor(frames_ret[1],cv2.COLOR_RGB2BGR))
                
                if (mov_dists and mov_dists[0] > 100): #don't move more than 100cm at this stage of testing.
                    continue
                
                if(not moved and mov_angle):
                    set_angle(URL_car, mov_angle[0])
                    print('move angle', mov_angle[0])
                    time.sleep(2)
                    if mov_angle[0] > 0:
                        total_angle += mov_angle[0]

                    if(mov_dists):                       
                        set_distance(URL_car, mov_dists[0]+3)
#                         cap_right = cv2.VideoCapture(URL_right + ":81/stream")
                        starttime = time.time()
#                         while ((time.time() - starttime) < 2.5):
#                             ret_r, frame_r = cap_right.read()
#                             if ret_r:
#                                 cv2.imshow("right_eye", frame_r) 
#                         cap_left.release()
                        time.sleep(3) ##wait two seconds, then reverse.
                        set_distance(URL_car, -mov_dists[0]-3)
                        starttime = time.time()
#                         while ((time.time() - starttime) < 2.5):
#                             ret_r, frame_r = cap_right.read()
#                             if ret_r:
#                                 cv2.imshow("right_eye", frame_r) 
#                         cap_right.release()
                        time.sleep(3)
                
                if (total_angle < 720): #two rounds
                    set_angle(URL_car, 7) ##move five degrees and check environment again.
                    total_angle += 7
                    time.sleep(2)
                else: 
                    brk = True
                    break
                
#                 while True and False:
#                     key1 = cv2.waitKey(1)
#                     if key1 == ord('p'):
#                         break
#                 key1 = cv2.waitKey(1)
        
        key = cv2.waitKey(1)
        if brk:
            break

        if key == ord('r'):
            idx = int(input("Select resolution index: "))
            set_resolution(URL_left, index=idx, verbose=True)
            set_resolution(URL_right, index=idx, verbose=True)

        elif key == ord('q'):
            val = int(input("Set quality (10 - 63): "))
            set_quality(URL_left, value=val)
            set_quality(URL_right, value=val)

        elif key == ord('a'):
            AWB = set_awb(URL_left, AWB)
            AWB = set_awb(URL_right, AWB)
            
        elif key == ord('p'): #3d
            cnt = 0

        elif key == 27:
            break

    cv2.destroyAllWindows()
    cap_left.release()
    cap_right.release()

['bottle']
['bottle']
is bottle
object upright
1.4403852939605712
bottle is 35.4cm away
move angle 1
['refrigerator']
['refrigerator']
refrigerator is 209.3cm away
['cup' 'bottle']
['cup']
cup is 55.9cm away
['cup' 'toilet' 'refrigerator']
['cup']
cup is 56.3cm away
['cup' 'bowl' 'refrigerator']
['cup' 'dining table']
cup is 59.3cm away
refrigerator is -4.8cm away
['cup' 'car']
['car' 'cup']
cup is 54.3cm away
car is 50.8cm away
['cup' 'car' 'refrigerator' 'refrigerator']
['cup' 'car']
cup is 56.4cm away
car is 51.8cm away
['bottle' 'car' 'hair drier']
['bottle' 'car']
is bottle
object upright
2.756794166564941
bottle is 44.7cm away
car is 53.2cm away
move angle 2
['bottle']
['bottle']
is bottle
object upright
5.15407190322876
bottle is 44.1cm away
move angle 5
['refrigerator']
['backpack' 'refrigerator']
refrigerator is 71.9cm away
['suitcase']
['suitcase']
suitcase is 155.8cm away
