In [1]:
import cv2
import numpy as np
import random
import tensorly as tl
from tensorly.decomposition import tucker

Using numpy backend.


# Data Ingestion

We can read all of our files, and get the number of frames in each one. When reading them as tensors we will truncate to the one with the smallest number of frames. I strived to take videos of the same length (~11s), but small discrepancies are bound to exist. For our particular application, truncation ought not to matter too much.

In [2]:
# Create VideoCapture objects
parking_lot = cv2.VideoCapture('parking_lot.MOV')
patio = cv2.VideoCapture('patio.MOV')

# Get number of frames in each video
parking_lot_frames = int(parking_lot.get(cv2.CAP_PROP_FRAME_COUNT))
patio_frames = int(patio.get(cv2.CAP_PROP_FRAME_COUNT))

parking_lot_frames, patio_frames

(321, 328)

In [3]:
# Get dimensions of each frame
parking_lot_height = int(parking_lot.get(cv2.CAP_PROP_FRAME_HEIGHT))
parking_lot_width = int(parking_lot.get(cv2.CAP_PROP_FRAME_WIDTH))
patio_height = int(patio.get(cv2.CAP_PROP_FRAME_HEIGHT))
patio_width = int(patio.get(cv2.CAP_PROP_FRAME_WIDTH))

print(parking_lot_height, parking_lot_width)
print(patio_height, patio_width)

1080 1920
1080 1920


Based on the number of frames and the dimensions of the frames, we will need a 4D tensor (321x1080x1920x3) to hold these videos:
- 321 for the frames of the images (we truncate the extra frames for the patio video)
- 1080x1920 for the height and width of the images
- 3 for the RGB color channels

In [4]:
# Create desired arrays
parking_lot_array = []
patio_array = []

# Keep track of the frame
patio_frame_nb = 0

# Convert the parking lot video
while parking_lot.isOpened():
    ret, frame = parking_lot.read()
    if not ret:
        break    
    parking_lot_array.append(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    
# Convert the patio video
while patio.isOpened() and patio_frame_nb < 321:
    ret, frame = patio.read()
    if not ret:
        break
    patio_array.append(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    patio_frame_nb += 1

Release the capture now that we don't need it anymore.

In [5]:
parking_lot.release()
patio.release()
cv2.destroyAllWindows()

# Data Manipulation

We need to create tensors out of the NumPy arrays for use with the TensorLy library.

In [6]:
# Create tensors from matrices
parking_lot_tensor = tl.tensor(parking_lot_array)
patio_tensor = tl.tensor(patio_array)

To speed up later steps, we randomly select frames of the tensors to focus on.

In [11]:
# Set the seed for reproducibility
random.seed(42)
random_frames = random.sample(range(1, 321), 50)

In [13]:
# Use these random frames to subset the tensors
subset_parking_lot = parking_lot_tensor[random_frames,:,:,:]
subset_patio = patio_tensor[random_frames,:,:,:]

# Naive Comparison

A natural way of comparing two tensors is to compute the norm of the difference between them.

In [15]:
parking_patio_naive_diff = tl.norm(subset_parking_lot - subset_patio)
parking_patio_naive_diff

180197.9867312618

# Unsupervised Learning

Now that we have the tensors, we can perform Tucker decomposition to get a more robust representation of the tensor (using the resulting core tensor) to get rid of noise and get a better sense of the similarity between two videos.

The main tuning parameter is the n-rank of the tensor. We use a simple version of AIC to compute the optimal n-rank for each tensor.

In [None]:
#tucker(parking_lot_tensor, ranks = [1,1,1,1])