## 8. Robustness and Performance

### 74 Consider `memoryview` and `bytearray` for Zero-Copy Interactions with bytes

In [1]:
import logging

In [2]:
def timecode_to_index(video_id, timecode):
    return 1234
    # Returns the byte offset in the video data

def request_chunk(video_id, byte_offset, size):
    pass
    # Returns size bytes of video_id's data from the offset

In [3]:
video_id = ...
timecode = '01:09:14:28'
byte_offset = timecode_to_index(video_id, timecode)
size = 20 * 1024 * 1024
video_data = request_chunk(video_id, byte_offset, size)

In [4]:
class NullSocket:
    def __init__(self):
        self.handle = open(os.devnull, 'wb')

    def send(self, data):
        self.handle.write(data)

In [5]:
socket = ...             # socket connection to client
video_data = ...         # bytes containing data for video_id
byte_offset = ...        # Requested starting position
size = 20 * 1024 * 1024  # Requested chunk size
import os

socket = NullSocket()
video_data = 100 * os.urandom(1024 * 1024)
byte_offset = 1234

chunk = video_data[byte_offset:byte_offset + size]
socket.send(chunk)

In [6]:
import timeit

In [7]:
def run_test():
    chunk = video_data[byte_offset:byte_offset + size]
    # Call socket.send(chunk), but ignoring for benchmark

result = timeit.timeit(
    stmt='run_test()',
    globals=globals(),
    number=100) / 100

print(f'{result:0.9f} seconds')

0.002842635 seconds


In [8]:
data = b'shave and a haircut, two bits'
view = memoryview(data)
chunk = view[12:19]
print(chunk)
print('Size:           ', chunk.nbytes)
print('Data in view:   ', chunk.tobytes())
print('Underlying data:', chunk.obj)

<memory at 0x7f2ad18ca880>
Size:            7
Data in view:    b'haircut'
Underlying data: b'shave and a haircut, two bits'


In [9]:
video_view = memoryview(video_data)

In [10]:
def run_test():
    chunk = video_view[byte_offset:byte_offset + size]
    # Call socket.send(chunk), but ignoring for benchmark

result = timeit.timeit(
    stmt='run_test()',
    globals=globals(),
    number=100) / 100

print(f'{result:0.9f} seconds')

0.000000435 seconds


In [11]:
class FakeSocket:

    def recv(self, size):
        return video_view[byte_offset:byte_offset+size]

    def recv_into(self, buffer):
        source_data = video_view[byte_offset:byte_offset+size]
        buffer[:] = source_data

In [12]:
socket = ...        # socket connection to the client
video_cache = ...   # Cache of incoming video stream
byte_offset = ...   # Incoming buffer position
size = 1024 * 1024  # Incoming chunk size
socket = FakeSocket()
video_cache = video_data[:]
byte_offset = 1234

chunk = socket.recv(size)
video_view = memoryview(video_cache)
before = video_view[:byte_offset]
after = video_view[byte_offset + size:]
new_cache = b''.join([before, chunk, after])

In [13]:
def run_test():
    chunk = socket.recv(size)
    before = video_view[:byte_offset]
    after = video_view[byte_offset + size:]
    new_cache = b''.join([before, chunk, after])

result = timeit.timeit(
    stmt='run_test()',
    globals=globals(),
    number=100) / 100

print(f'{result:0.9f} seconds')

0.018530717 seconds


In [14]:
try:
    my_bytes = b'hello'
    my_bytes[0] = b'\x79'
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-14-0d564a76e18d>", line 3, in <module>
    my_bytes[0] = b'\x79'
TypeError: 'bytes' object does not support item assignment


In [15]:
my_array = bytearray(b'hello')
my_array = bytearray('hello 안녕'.encode("utf8"))
my_array[0] = 0x79
print(my_array)

bytearray(b'yello \xec\x95\x88\xeb\x85\x95')


In [16]:
my_array = bytearray(b'row, row, row your boat')
my_array = bytearray('row, row, row your 보트'.encode("utf8"))
my_view = memoryview(my_array)
write_view = my_view[3:13]
write_view[:] = b'-10 bytes-'
print(my_array)

bytearray(b'row-10 bytes- your \xeb\xb3\xb4\xed\x8a\xb8')


In [17]:
video_array = bytearray(video_cache)
write_view = memoryview(video_array)
chunk = write_view[byte_offset:byte_offset + size]
socket.recv_into(chunk)

In [18]:
def run_test():
    chunk = write_view[byte_offset:byte_offset + size]
    socket.recv_into(chunk)

result = timeit.timeit(
    stmt='run_test()',
    globals=globals(),
    number=100) / 100

print(f'{result:0.9f} seconds')

0.000100957 seconds


> - `memoryview` 내장 타입은 객체의 스라이스에 대해 파이썬 고성능 버퍼 프로토콜로 읽고 쓰기를 지원하는, 복사가 없는 인터페이스를 제공한다.
> - `bytearray` 내장 타입은 복사가 없는 읽기 함수(`socket.recv_from`과 같은)에 사용할 수 있는 `bytes`와 비슷한 변경 가능한 타입을 제공한다.
> - `memoryview`로 `bytearray`를 감싸면 복사에 따른 비용을 추가 부담하지 않고도 수신받은 데이터를 버퍼에서 원하는 위치에 스플라이스할 수 있다.