In [None]:
from lz78 import Sequence, LZ78Encoder, CharacterMap, StreamingLZ78Encoder
import numpy as np
from time import time
import lorem

### Int Sequence

In [None]:
data = np.random.randint(0, 2, size=(10_000_000,))
x = Sequence(data, alphabet_size=2)

In [None]:
len(x), x[:10]

### Encode / Decode

In [None]:
tic = time()
encoder = LZ78Encoder()
encoded = encoder.encode(x)
toc = time()
print("encode time", toc - tic)

In [None]:
encoded.compression_ratio()

In [None]:
tic = time()
decoded = encoder.decode(encoded)
toc = time()
print("decode time", toc - tic)

In [None]:
len(decoded), decoded[:10]

### String Sequence

In [None]:
s = " ".join(([lorem.paragraph() for _ in range(100)]))
s

In [None]:
charmap = CharacterMap(s)

In [None]:
charmap.encode_all("lorem ipsum")

In [None]:
# this should error, but with a helpful warning message
charmap.encode_all("hello world")

In [None]:
charseq = Sequence(s, charmap=charmap)

In [None]:
len(charseq), charseq[0:10]

In [None]:
tic = time()
encoder = LZ78Encoder()
encoded = encoder.encode(charseq)
toc = time()
print("encode time", toc - tic)

In [None]:
encoded.compression_ratio()

### Streaming

In [None]:
charmap = CharacterMap("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ. ,?")

In [None]:
encoder = StreamingLZ78Encoder(charmap.alphabet_size())

In [None]:
for _ in range(100):
    encoder.encode_block(Sequence(lorem.paragraph(), charmap=charmap))

In [None]:
encoder.get_encoded_sequence().compression_ratio()

In [None]:
encoder.decode()[100:110]

In [None]:
charmap.decode_all(encoder.decode()[100:200])