In [1]:
from IPython.display import display
import ipywidgets
import traitlets
from jetbot import Camera, bgr8_to_jpeg

camera = Camera()

image_widget = ipywidgets.Image()

traitlets.dlink((camera, 'value'), (image_widget, 'value'), transform=bgr8_to_jpeg)

display(image_widget)

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

In [2]:
import cv2

import numpy as np
import torch
from torch import nn

# use_cuda = torch.cuda.is_available()
# device = torch.device("cuda" if use_cuda else "cpu")
use_cuda = False
device = "cpu"

print(device)

cpu


In [3]:
from PIL import Image

project_root = '.'

image_shape = (28, 28) # for pure CNN
# image_shape = (224, 224) # For general model like ResNet and MobileNet

In [4]:
model_path = project_root + '/tracking_cnn.pth'

class CNN(nn.Module):
    def __init__(self, input_shape, n_actions=2):
        super(CNN, self).__init__()

        self.input_shape = input_shape

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 16, kernel_size=8, stride=2),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Conv2d(32, 32, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 256),
            nn.ReLU(),
            nn.Linear(256, n_actions)
        )

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)

In [None]:
from torchvision.models import resnet18

model_path = project_root + '/tracking_resnet.pth'

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.resnet = resnet18(pretrained=True)
        self.fc = nn.Linear(1000, 2)

    def forward(self, x):
        x = self.resnet(x)
        output = self.fc(x)
        return output

In [5]:
import os
# from torchsummary import summary

model = CNN(input_shape=(3, image_shape[0], image_shape[1]))
# model = MobileNet()
# model = ResNet()
# print(model)
# summary(model, (3, image_shape[0], image_shape[1]), device='cpu')
if os.path.exists(model_path):
    model.load_state_dict(torch.load(model_path, map_location=device))

In [6]:
def inference(img_original, model):
    if use_cuda:
        model = model.cuda()
    with torch.no_grad():
        model.eval()
        img = torch.tensor(np.array([np.asarray(img_original.resize(image_shape)).transpose(2, 0, 1) / 255])).float().to(device)
        pred = model(img).squeeze().cpu().numpy()
        x = pred[0] * 5 + 46
        y = pred[1] * 14 + 39
        return x, y

In [7]:
from jetbot import Robot

robot = Robot()

In [19]:
speed_gain_slider = ipywidgets.FloatSlider(min=0.0, max=1.0, step=0.01, description='speed gain')
steering_gain_slider = ipywidgets.FloatSlider(min=0.0, max=1.0, step=0.01, value=0.2, description='steering gain')
steering_dgain_slider = ipywidgets.FloatSlider(min=0.0, max=0.5, step=0.001, value=0.0, description='steering kd')
steering_bias_slider = ipywidgets.FloatSlider(min=-0.3, max=0.3, step=0.01, value=0.0, description='steering bias')

display(speed_gain_slider, steering_gain_slider, steering_dgain_slider, steering_bias_slider)
speed_gain_slider.value = 0.02
steering_gain_slider.value = 0.06
steering_dgain_slider.value = 0.01

FloatSlider(value=0.0, description='speed gain', max=1.0, step=0.01)

FloatSlider(value=0.2, description='steering gain', max=1.0, step=0.01)

FloatSlider(value=0.0, description='steering kd', max=0.5, step=0.001)

FloatSlider(value=0.0, description='steering bias', max=0.3, min=-0.3, step=0.01)

-0.102491075099 -0.0271585702896
L= 0.0
R= 0.04224332762421784
-0.102136664391 -0.0248453164101
L= 0.0
R= 0.0422731961078763
-0.102417494655 -0.0264611244202
L= 0.03774088201597633
R= 0.08225911798402366
-0.102710989416 -0.0273341584206
L= 0.037708105036391895
R= 0.0822918949636081
-0.102761496902 -0.0285581016541
L= 0.07775539653874682
R= 0.1222446034612532
-0.102275622189 -0.0247564792633
L= 0.07769055496620786
R= 0.12230944503379215
-0.102475160658 -0.0267172384262
L= 0.07774108222481332
R= 0.12225891777518669
-0.102383518517 -0.0242919993401
L= 0.07764693126029676
R= 0.12235306873970325
-0.102719143331 -0.0265491104126
L= 0.07768378028285573
R= 0.12231621971714428
-0.108016239405 -0.054017996788
L= 0.077693810420296
R= 0.12230618957970402
-0.116245155334 -0.109014599323
L= 0.07819084973658576
R= 0.12180915026341425
-0.0862076196074 -0.217335120812
L= 0.0559408787210633
R= 0.08405912127893672
-0.0903883109987 -0.210838494003
L= 0.0349263343129254
R= 0.06507366568707461
-0.1085949677

-0.102568529844 -0.457569489479
L= 0.08805775166529767
R= 0.11194224833470234
-0.140872405767 -0.377684817314
L= 0.08036710730603164
R= 0.11963289269396837
-0.131052111387 -0.251575138569
L= 0.07961755607177404
R= 0.12038244392822597
-0.0999247539043 -0.403639316559
L= 0.08811951692848055
R= 0.11188048307151946
-0.115104310513 -0.519010148048
L= 0.08662367788628758
R= 0.11337632211371243
-0.11592247963 -0.374757196903
L= 0.0840860760282974
R= 0.11591392397170261
-0.147870724201 -0.154704717994
L= 0.07289114549775386
R= 0.12710885450224615
-0.102149071097 -0.512961211205
L= 0.09031180775704001
R= 0.10968819224296
-0.103354521394 -0.146409288645
L= 0.08032488008605936
R= 0.11967511991394066
-0.126727478504 -0.248833637238
L= 0.0802496208447817
R= 0.11975037915521831
-0.111610745192 -0.184209290147
L= 0.0811884519380926
R= 0.11881154806190741
-0.122463421822 -0.154909266233
L= 0.0781019048503778
R= 0.12189809514962222
-0.133618371487 -0.174205368757
L= 0.07716251635516073
R= 0.12283748364

In [20]:
x_slider = ipywidgets.FloatSlider(min=-1.0, max=1.0, description='x')
y_slider = ipywidgets.FloatSlider(min=0, max=1.0, orientation='vertical', description='y')
steering_slider = ipywidgets.FloatSlider(min=-1.0, max=1.0, description='steering')
speed_slider = ipywidgets.FloatSlider(min=0, max=1.0, orientation='vertical', description='speed')

display(ipywidgets.HBox([y_slider, speed_slider]))
display(x_slider, steering_slider)

HBox(children=(FloatSlider(value=0.0, description='y', max=1.0, orientation='vertical'), FloatSlider(value=0.0…

FloatSlider(value=0.0, description='x', max=1.0, min=-1.0)

FloatSlider(value=0.0, description='steering', max=1.0, min=-1.0)

In [21]:
from PIL import Image

angle = 0.0
angle_last = 0.0

def execute(change):
    global angle, angle_last
    image = Image.fromarray(change['new'])
    x, y = inference(image, model)
    x = (x - 50.0) / 50.0
    y = (y - 50.0) / 50.0
    print(x, y)
    y = (0.5 - y) / 2.0
    
    x_slider.value = x
    y_slider.value = y
    
    speed_slider.value = speed_gain_slider.value
    
    angle = np.arctan2(x, y)
    pid = angle * steering_gain_slider.value + (angle - angle_last) * steering_dgain_slider.value
    angle_last = angle
    
    steering_slider.value = pid + steering_bias_slider.value
    
    print('L=',max(min(speed_slider.value + steering_slider.value, 1.0), 0.0))
    print('R=',max(min(speed_slider.value - steering_slider.value, 1.0), 0.0))
    
    robot.left_motor.value = max(min(speed_slider.value + steering_slider.value, 1.0), 0.0)
    robot.right_motor.value = max(min(speed_slider.value - steering_slider.value, 1.0), 0.0)
    
execute({'new': camera.value})

-0.140543847084 -0.004506316185
L= 0.0
R= 0.05558233708496335


In [18]:
robot.stop()

In [22]:
camera.observe(execute,names='value')

-0.130580263138 -0.176886070967
L= 0.0
R= 0.040692795272952456
-0.132262055874 -0.201060631275
L= 0.0
R= 0.041573925049786305
-0.133484106064 -0.222054774165
L= 0.0
R= 0.04118218626741852
-0.130466352701 -0.22294715941
L= 0.0
R= 0.04070499262914731
-0.126317696571 -0.187012892365
L= 0.0
R= 0.04120295906255156
-0.128980671167 -0.219126628041
L= 0.0
R= 0.040585394069439784
-0.129515998363 -0.226231638789
L= 0.0
R= 0.04053873736820794
-0.131660132408 -0.251767962575
L= 0.0
R= 0.04015781888610584
-0.129287804961 -0.242648394704
L= 0.0
R= 0.04008456531769403
-0.129174201488 -0.234282432199
L= 0.0
R= 0.0403312570208576
-0.130612318516 -0.250430135727
L= 0.0
R= 0.04006556171039901
-0.128586273193 -0.208052400947
L= 0.0
R= 0.04103786905088854
-0.13264441967 -0.227980226874
L= 0.0
R= 0.04097830642675728
-0.132180272341 -0.220224884152
L= 0.0
R= 0.04113034865094293
-0.133233450651 -0.248008000851
L= 0.0
R= 0.040437470013481525
-0.133135240078 -0.240035359263
L= 0.0
R= 0.04075478957327057
-0.1335

-0.144699482918 -0.0898225128651
L= 0.0
R= 0.04749154420885661
-0.149088697433 -0.202457796931
L= 0.0
R= 0.04353851723108657
-0.136975662708 -0.203695020676
L= 0.0
R= 0.041973352193457124
-0.143918089867 -0.184220484495
L= 0.0
R= 0.044161746132605956
-0.138207929134 -0.314998636246
L= 0.001092991974178427
R= 0.038907008025821574
-0.131610678434 -0.405283279419
L= 0.003462696291945379
R= 0.03653730370805462
0.00765999078751 -0.0530462121964
L= 0.024768175720272824
R= 0.015231824279727175
-0.0157512116432 -0.20402277559
L= 0.01659290603929709
R= 0.02340709396070291
-0.0951199311018 -0.296636475325
L= 0.004038207543895368
R= 0.03596179245610463
-0.1023495543 -0.279265441895
L= 0.004362626712554073
R= 0.03563737328744593
-0.105187450647 -0.304142613411
L= 0.004657243803934584
R= 0.03534275619606542
-0.0970777320862 -0.29379543066
L= 0.005767073692808022
R= 0.03423292630719198
-0.0984668332338 -0.277829477787
L= 0.005040725109962707
R= 0.03495927489003729
-0.0902849930525 -0.280963402987
L=

-0.0246382880211 -0.121561006308
L= 0.01741733941641159
R= 0.02258266058358841
0.0226808857918 -0.0265595078468
L= 0.026806604234432214
R= 0.013193395765567788
0.0150422763824 0.00480928421021
L= 0.023388168904127073
R= 0.016611831095872928
-0.0902691181004 -0.0396083545685
L= 0.0
R= 0.04320738011661819
-0.0867382626235 -0.0208210086823
L= 0.0007219012226850861
R= 0.03927809877731492
-0.105080838799 -0.0540854215622
L= 0.0
R= 0.04216189007422881
-0.0652017223835 -0.0283362841606
L= 0.0066865260847877115
R= 0.03331347391521229
-0.0827375161648 -0.240294370949
L= 0.007026054519377706
R= 0.032973945480622296
-0.0657183381915 -0.401914360523
L= 0.012069265153493797
R= 0.027930734846506204
-0.0655564892292 -0.305483314991
L= 0.01015189689626323
R= 0.02984810310373677
-0.0313396441936 -0.22171569556
L= 0.01554948588021924
R= 0.02445051411978076
-0.0385406720638 -0.163340221047
L= 0.012768492326204283
R= 0.027231507673795716
-0.0577028754354 -0.125015124083
L= 0.008375653623442355
R= 0.031624

-0.102948604822 -0.028742518425
L= 0.0
R= 0.0422880730636738
-0.103101131618 -0.0299567651749
L= 0.0
R= 0.042261344917852925
-0.102910048068 -0.0294504117966
L= 0.0
R= 0.04224283151477724
-0.10327696681 -0.0300764608383
L= 0.0
R= 0.04230211758085144
-0.103280856013 -0.0296966791153
L= 0.0
R= 0.04231195152550665
-0.103453645408 -0.0320840167999
L= 0.0
R= 0.042242526217462925
-0.103268434405 -0.03071205616
L= 0.0
R= 0.04227076226331836
-0.103562812805 -0.0312577295303
L= 0.0
R= 0.04231115479968131
-0.1034690696 -0.031264872551
L= 0.0
R= 0.04228323815761276
-0.102949457169 -0.0293409967422
L= 0.0
R= 0.04225306247187653
-0.103202632368 -0.0297964477539
L= 0.0
R= 0.04229558856852879
-0.103229159415 -0.0302852773666
L= 0.0
R= 0.042274445134731914
-0.10310743928 -0.0288177704811
L= 0.0
R= 0.04231437921170306
-0.103098523915 -0.0299923968315
L= 0.0
R= 0.042254422145731906
-0.103474241793 -0.0319069433212
L= 0.0
R= 0.042262969184801394
-0.103005028069 -0.0270524597168
L= 0.0
R= 0.04237241486751

In [None]:
camera.stop()

In [17]:
import time

camera.unobserve(execute, names='value')

time.sleep(0.1)  # add a small sleep to make sure frames have finished processing

robot.stop()

ValueError: list.remove(x): x not in list