In [4]:
a = b"Hello World"    # a sequence of bytes - bytes stream / string

print(a, type(a))

b'Hello World' <class 'bytes'>


In [8]:
print("hello world".encode())
print("hello world".encode("utf-8"))
print("hello world".encode("ascii"))

b'hello world'
b'hello world'
b'hello world'


In [37]:
file_path = "E:\Docs\explore_python\io\LBT_TH066.pdf"

with open(file_path, "rb") as f:    # BufferedReader object
    data = f.read()        

    print(type(data))               # return bytes object

<class 'bytes'>


In [15]:
from io import BytesIO

# Automatically close file
with BytesIO() as f:            # write to an in-memory buffer
    f.write(b"Hello World")     # can write only bytes
    f.write(b"Hello World")
    f.write(b"Hello World")

    print(f.getvalue())         # return bytes stream
    print(type(f))              # BytesIO object
    print("-" * 50)

# Manual write and close file
f = BytesIO()
f.write(b"Hello World")
f.write(b"Hello World")
f.write(b"Hello World")

print(f.getvalue())
print(type(f))
f.close()

b'Hello WorldHello WorldHello World'
<class '_io.BytesIO'>
--------------------------------------------------
b'Hello WorldHello WorldHello World'
<class '_io.BytesIO'>


In [21]:
# So sánh hiệu năng khi sử dụng raw bytes và BytesIO 
from time import time

start = time()
buffer = b""
for _ in range(100_000):
    buffer += b"Hello World"
end = time()
print(f"Pure buffer time: {end - start:.5}s")

start = time()
f = BytesIO()
for _ in range(100_000):
    f.write(b"Hello World")    # write bytes to in-memory buffer (file)
end = time()
print(f"BytesIO time: {end - start:.5}s")
print("-" * 50)

Pure buffer time: 4.2386s
BytesIO time: 0.013824s
--------------------------------------------------


In [23]:
from io import StringIO

with StringIO() as f:
    f.write("Hello World\n")
    print("This is a test", end="", file=f)      # print the string to file
    print(f.getvalue(), end="")

Hello World
This is a test

## Use cases

In [34]:
# Dơnload image from internet -> manipulate

import requests
from PIL import Image
from io import BytesIO

import cv2
import numpy as np


use_cv2 = False

url = "https://nguoinoitieng.tv/images/nnt/105/0/biks.jpg"
response = requests.get(url)        # return bytes object

img = BytesIO(response.content)     # convert bytes to file-like object (in-memory binary stream)
img = Image.open(img)               # return JpegImageFile object

if use_cv2:
    img = np.array(img)                 # image is in RGB mode
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    cv2.imwrite("biks.jpg", img)        # save image to disk
else:
    img = img.convert("L")          # convert image to grayscale
    img_bf = BytesIO()              # create an in-memory buffer
    img.save(img_bf, format="JPEG")   # save the image as jpeg to a file-like object

    # Retrieve the binary content from the BytesIO object
    bytes_data = img_bf.getvalue()

    # Save the image to disk if needed
    with open("biks.jpg", "wb") as f:
        f.write(bytes_data)

In [35]:
# Create a zip file that contains multiple files dynamically -> 
# send it over a network without writing it to disk

from io import BytesIO
from zipfile import ZipFile, ZIP_DEFLATED


zf_buffer = BytesIO()

# Create a ZIP file in memory 
with ZipFile(file=zf_buffer, mode="w", compression=ZIP_DEFLATED) as zf:

    # Add files to the ZIP archive from memory 
    zf.writestr("file1.txt", "This is the content of the first file.")
    zf.writestr("file2.txt", "This is the content of the second file.")
    zf.writestr("file3.txt", "This is the content of the third file.")

# Get the bytes of the ZIP file
zf_buffer.seek(0)                   # move the pointer back to the beginning of the BytesIO object
zip_data = zf_buffer.getvalue()     # get the binary data of the in-memory ZIP file

# Now, zip_data contain the binary content of the ZIP file. You can serve it over a network or save it to disk.
with open("zip_file.zip", "wb") as zf:
    zf.write(zip_data)