In [1]:
import sys
sys.path.insert(0,'..')
from src.server.db_api import connect

In [2]:
import nest_asyncio
nest_asyncio.apply()
connection = connect(host = '0.0.0.0', port = 5432) # hostname, port of the server where EVADB is running

In [3]:
cursor = connection.cursor()

### Upload the survellience video to analyse

In [4]:
cursor.execute('UPLOAD INFILE "../data/bdd_test/bdd_1.mp4" PATH "bdd_video.mp4";')
response = cursor.fetch_all()
print(response)

Response Object:
@status: 0
@batch: Batch Object:
@dataframe: Empty DataFrame
Columns: []
Index: []
@batch_size: 0
@identifier_column: id
@metrics: None


## Visualize the video

In [5]:
from ipywidgets import Video
Video.from_file("../data/bdd_test/bdd_1.mp4", embed=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00<wmoov\x00\x00\x00lmvhd\x00\x00\x00…

## Load video into EVA

In [6]:
cursor.execute('LOAD DATA INFILE "bdd_video.mp4" INTO MyVideo;')
response = cursor.fetch_all()
print(response)

Response Object:
@status: 0
@batch: Batch Object:
@dataframe:            Video  Num Loaded Frames
0  bdd_video.mp4               1203
@batch_size: 1
@identifier_column: id
@metrics: None


## Run the model on the loaded video

In [7]:
cursor.execute("""SELECT id, Unnest(FastRCNNObjectDetector(data)) FROM MyVideo""")
response = cursor.fetch_all()

In [8]:
response.batch.frames

Unnamed: 0,id,boxes,labels,scores
0,407,"[[461.095123291, 421.1968688965], [554.2673339...",car,0.997653
1,407,"[[559.2814331055, 419.9615783691], [656.066711...",car,0.997329
2,407,"[[645.993347168, 433.1330566406], [682.3140258...",car,0.996911
3,408,"[[463.528503418, 422.7390136719], [555.3230590...",car,0.998743
4,408,"[[644.3532104492, 434.6446228027], [680.485046...",car,0.996526
...,...,...,...,...
7500,180,"[[344.4665527344, 307.2627258301], [371.658508...",traffic light,0.989451
7501,180,"[[208.8990478516, 152.3822479248], [237.628417...",traffic light,0.982935
7502,180,"[[1052.2082519531, 404.5354614258], [1079.6346...",person,0.981206
7503,180,"[[34.9946212769, 343.8520812988], [287.7828979...",bus,0.909774


## Make model inference (This section will be moved to a UDF, temporarily seeing performance now)

In [9]:
import pandas as pd
import torch
from torchvision import transforms

import cv2
from PIL import Image

In [10]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [11]:
number_to_make = {0: 'AM',
 1: 'Acura',
 2: 'Aston',
 3: 'Audi',
 4: 'BMW',
 5: 'Bentley',
 6: 'Bugatti',
 7: 'Buick',
 8: 'Cadillac',
 9: 'Chevrolet',
 10: 'Chrysler',
 11: 'Daewoo',
 12: 'Dodge',
 13: 'Eagle',
 14: 'FIAT',
 15: 'Ferrari',
 16: 'Fisker',
 17: 'Ford',
 18: 'GMC',
 19: 'Geo',
 20: 'HUMMER',
 21: 'Honda',
 22: 'Hyundai',
 23: 'Infiniti',
 24: 'Isuzu',
 25: 'Jaguar',
 26: 'Jeep',
 27: 'Lamborghini',
 28: 'Land',
 29: 'Lincoln',
 30: 'MINI',
 31: 'Maybach',
 32: 'Mazda',
 33: 'McLaren',
 34: 'Mercedes-Benz',
 35: 'Mitsubishi',
 36: 'Nissan',
 37: 'Plymouth',
 38: 'Porsche',
 39: 'Ram',
 40: 'Rolls-Royce',
 41: 'Scion',
 42: 'Spyker',
 43: 'Suzuki',
 44: 'Tesla',
 45: 'Toyota',
 46: 'Volkswagen',
 47: 'Volvo',
 48: 'smart'}

In [12]:
def infer_vehicle_makes_from_video(model, object_detections, input_path):
    
    object_detections['make_prediction'] = 'None'
    color=(0,255,0)
    thickness=3

    vcap = cv2.VideoCapture(input_path)
    width = int(vcap.get(3))
    height = int(vcap.get(4))
    fps = vcap.get(5)
    #fourcc = cv2.VideoWriter_fourcc(*'H264') #codec
    #video=cv2.VideoWriter(output_path, fourcc, fps, (width,height))

    frame_id = 0
    
    # Capture frame-by-frame
    ret, frame = vcap.read()  # ret = 1 if the video is captured; frame is the image

    while ret:
        print(frame_id)
        
        # fetch detections of all cars in this frame id
        car_detections = object_detections[(object_detections.id == frame_id) & (object_detections.labels.str.contains('car'))]
        
        for index, row in car_detections.iterrows():
            x1, y1 = row['boxes'][0]
            x2, y2 = row['boxes'][1]

            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            #print(f"Bounding rectangle: ({x1},{y1}) -> ({x2},{y2})")

            cropped_car_img = frame[y1:y2, x1:x2]
            #cv2.imshow('cropped car', cropped_car_img)
            #cv2.waitKey(0)
            
            #print("Running make inference on cropped car")
            
            img = Image.fromarray(cropped_car_img)
            image = data_transforms['valid'](img).float()
            input_image = torch.Tensor(image).unsqueeze(0).to(device)
            
            preds = model(input_image)
            label_index = int(preds.max(1)[1][0])
            #print("preds", preds)
            #print("preds softmax", torch.nn.functional.softmax(preds, dim=1))
            #print(f"class predicted for {label_index}: {number_to_make[label_index]}")
            object_detections.at[index, 'make_prediction'] = number_to_make[label_index]
                        
        frame_id+=1
        ret, frame = vcap.read()
    
    #video.release()
    vcap.release()
    cv2.destroyAllWindows()
    
    return object_detections

In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
make_model = torch.load('final_make_70.pt', map_location='cpu')
make_model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

In [14]:
device

device(type='cuda', index=0)

In [15]:
input_path = '../data/bdd_test/bdd_1.mp4'
car_detections = infer_vehicle_makes_from_video(make_model, response.batch.frames, input_path)

0
Bounding rectangle: (604,433) -> (644,466)
Bounding rectangle: (638,418) -> (743,510)
1
Bounding rectangle: (602,433) -> (642,466)
Bounding rectangle: (637,418) -> (742,508)
2
Bounding rectangle: (600,431) -> (641,464)
Bounding rectangle: (639,415) -> (741,506)
Bounding rectangle: (586,438) -> (600,452)
3
Bounding rectangle: (600,430) -> (643,464)
Bounding rectangle: (639,416) -> (741,506)
Bounding rectangle: (586,438) -> (601,452)
4
Bounding rectangle: (600,430) -> (641,464)
Bounding rectangle: (638,416) -> (740,506)
5
Bounding rectangle: (601,430) -> (640,466)
Bounding rectangle: (637,416) -> (743,508)
6
Bounding rectangle: (600,430) -> (642,464)
Bounding rectangle: (639,416) -> (743,505)
7
Bounding rectangle: (598,431) -> (640,465)
Bounding rectangle: (638,416) -> (738,505)
8
Bounding rectangle: (599,431) -> (640,466)
Bounding rectangle: (638,416) -> (741,505)
9
Bounding rectangle: (599,430) -> (642,465)
Bounding rectangle: (640,416) -> (742,504)
10
Bounding rectangle: (597,431) -

In [16]:
def annotate_video_with_make(detections, input_video_path, output_video_path):
    color=(0,255,0)
    thickness=3

    vcap = cv2.VideoCapture(input_path)
    width = int(vcap.get(3))
    height = int(vcap.get(4))
    fps = vcap.get(5)
    fourcc = cv2.VideoWriter_fourcc(*'H264') #codec
    video=cv2.VideoWriter(output_path, fourcc, fps, (width,height))

    frame_id = 0
    # Capture frame-by-frame
    ret, frame = vcap.read()  # ret = 1 if the video is captured; frame is the image

    while ret:
        df = detections
        df = df[['boxes', 'labels', 'make_prediction']][df.id == frame_id]
        if df.size:
            for bbox, label, make_prediction in df.values:
                if label == 'car':
                    x1, y1 = bbox[0]
                    x2, y2 = bbox[1]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                    img=cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness) # object bbox
                    cv2.putText(img, make_prediction, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, thickness-1) # object label
            video.write(img)

        frame_id+=1
        ret, frame = vcap.read()

    video.release()
    vcap.release()

In [17]:
car_detections

Unnamed: 0,id,boxes,labels,scores,make_prediction
0,407,"[[461.095123291, 421.1968688965], [554.2673339...",car,0.997653,Ford
1,407,"[[559.2814331055, 419.9615783691], [656.066711...",car,0.997329,Ford
2,407,"[[645.993347168, 433.1330566406], [682.3140258...",car,0.996911,Geo
3,408,"[[463.528503418, 422.7390136719], [555.3230590...",car,0.998743,Ford
4,408,"[[644.3532104492, 434.6446228027], [680.485046...",car,0.996526,Geo
...,...,...,...,...,...
7500,180,"[[344.4665527344, 307.2627258301], [371.658508...",traffic light,0.989451,
7501,180,"[[208.8990478516, 152.3822479248], [237.628417...",traffic light,0.982935,
7502,180,"[[1052.2082519531, 404.5354614258], [1079.6346...",person,0.981206,
7503,180,"[[34.9946212769, 343.8520812988], [287.7828979...",bus,0.909774,


In [18]:
from ipywidgets import Video
input_path = '../data/bdd_test/bdd_1.mp4'
output_path = 'bdd_video_make.mp4'
annotate_video_with_make(car_detections, input_path, output_path)
Video.from_file(output_path)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x01\x17\x1b\xc6mdat\x0…