### Imports

In [None]:
import cv2
from time import sleep, time
import json
import numpy as np
import base64
import grpc
import logging
import zlib
import struct
import math
from datetime import datetime, timedelta
import pravega

### Helper functions

In [None]:
def chunks(l, n):
    """Yield successive n-sized chunks from l.
    From https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks"""
    for i in range(0, len(l), n):
        yield l[i:i + n]

def pravega_chunker_v1(payload, max_chunk_size):
    """Split payload into chunks of 1 MiB or less.
    When written to Pravega, chunked events have the following 64-bit header.
      version: value must be 1 (8-bit signed integer)
      reserved: value must be 0 (3 8-bit signed integers)
      chunk_index: 0-based chunk index (16-bit signed big endian integer)
      final_chunk_index: number of chunks minus 1 (16 bit signed big endian integer)
    """
    version = 1
    max_chunk_data_size = max_chunk_size - 4096
    chunk_list = list(chunks(payload, max_chunk_data_size))
    final_chunk_index = len(chunk_list) - 1
    for chunk_index, chunked_payload in enumerate(chunk_list):
        is_final_chunk = chunk_index == final_chunk_index
        header = struct.pack('!bxxxhh', version, chunk_index, final_chunk_index)
        chunked_event = header + chunked_payload
        yield (chunked_event, chunk_index, is_final_chunk)

def encode_record(record: dict) -> bytes:
    """Encode the record
    JSON is universally compatible but require images to be base64 encoded.
    For optimal performance, other encodings should be used such as Avro or Protobuf.
    """
    r = record.copy()
    r['data'] = base64.b64encode(record['data']).decode('utf-8')
    encoded = json.dumps(r).encode('utf-8')
    return encoded

### Generate a record for every frame

In [None]:
def pravega_request_generator(data_generator, scope, stream, max_chunk_size, use_transactions):
    for record in data_generator:
        t = record['timestamp']
        sleep_sec = t/1000.0 - time()
        if sleep_sec > 0.0:
            sleep(sleep_sec)
        record['timestamp'] = (datetime(1970, 1, 1) + timedelta(milliseconds=t)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + 'Z'
        payload = encode_record(record)
        for (chunked_event, chunk_index, is_final_chunk) in pravega_chunker_v1(payload, max_chunk_size=max_chunk_size):
            request = pravega.pb.WriteEventsRequest(
                event=chunked_event,
                scope=scope,
                stream=stream,
                use_transaction=use_transactions,
                commit=is_final_chunk and use_transactions,
                )
            # logging.info(request)
            yield request
        if True:
            record_to_log = record.copy()
            # record_to_log['data'] = str(record_to_log['data'][:10]) + '...'
            del record_to_log['data']
            record_to_log['data_len'] = len(record['data'])
            record_to_log['payload_len'] = len(payload)
            record_to_log['final_chunk_index'] = chunk_index
            logging.info('%d: %s' % (record_to_log['camera'], json.dumps(record_to_log)))

### Read one frame at a time from camera feed and create a record

In [None]:
def data_generator(cap, include_checksum, ssrc):
    frame_number = 0
    while True:
        # Capture video frame by frame
        ret, frame = cap.read()
        timestamp = int(time() * 1000)
        # _, data = cv2.imencode(".png", frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
        _, data = cv2.imencode(".jpg", frame)
        if include_checksum:
            # Add CRC32 checksum to allow for error detection.
            chucksum = struct.pack('!I', zlib.crc32(data))
            data = chucksum + frame
        record = {
            'timestamp': timestamp,
            'frame_number': frame_number,
            'camera': 0,
            'ssrc': ssrc,
            'data': data            
        }
        yield record
        frame_number += 1

### Capture video from a webcam

In [None]:
logging.basicConfig(level=logging.INFO)
# Variables 
PRAVEGA_GATEWAY = '10.1.83.104:80'  # Pravega gateway
PRAVEGA_SCOPE = 'examples'
PRAVEGA_STREAM = 'video'    
ADD_CHECKSUM = False  # Prepend the data with a checksum that can be used to detect errors    
USE_TRANSACTIONS = False  # If true, use Pravega transactions
MAX_CHUNK_SIZE = 1024*1024  # Maximum size of chunk (bytes)

# Capture video from camera with index 0
cap = cv2.VideoCapture(0)
logging.debug("frame width is {0}; frame height is {1}".format(cap.get(3), cap.get(4)))

while(True):
    ssrc = np.random.randint(0, 2**31)
    data_iter = data_generator(cap, ADD_CHECKSUM, ssrc)
    pravega_request_iter = pravega_request_generator(data_iter, PRAVEGA_SCOPE, PRAVEGA_STREAM, MAX_CHUNK_SIZE, USE_TRANSACTIONS)
    with grpc.insecure_channel(PRAVEGA_GATEWAY) as pravega_channel:
        pravega_client = pravega.grpc.PravegaGatewayStub(pravega_channel)
        pravega_client.CreateScope(pravega.pb.CreateScopeRequest(scope=PRAVEGA_SCOPE))
        pravega_client.CreateStream(pravega.pb.CreateStreamRequest(scope=PRAVEGA_SCOPE, stream=PRAVEGA_STREAM))
        write_response = pravega_client.WriteEvents(pravega_request_iter)
        logging.info("write_response=" + str(write_response))

# Realease the capture on exit
cap.release()