In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from tqdm import tqdm

from seq.model import Chunk, ChunkDatabase, Cluster

np.random.seed(42)

In [3]:
data = np.random.randint(1, 10, (100_000, 32))

In [4]:
db = ChunkDatabase(data, threshold=0.001, max_chunk_length=5)

100%|██████████| 5/5 [00:10<00:00,  2.07s/it]


In [12]:
db

ChunkDatabase Information
Data Shape: (100000, 32)
Threshold: 0.001
Max Chunk Length: 5
Number of Chunks: 7373
Number of Chunks by Length
3: 7289
2: 81
1: 3

In [5]:
processed_chunks = set()
clusters: list[Cluster] = []

for chunk in db.chunks:
  if chunk in processed_chunks:
    continue

  cluster = Cluster(chunk)
  left = cluster.left
  right = cluster.right

  left_candidate = [
    c
    for c in db.get_candidate(cluster.left)
    if c not in processed_chunks and cluster.can_appendleft(c)
  ]

  right_candidate = [
    c
    for c in db.get_candidate(cluster.right)
    if c not in processed_chunks and cluster.can_append(c)
  ]

  while len(left_candidate) > 0 or len(right_candidate) > 0:
    if len(left_candidate):
      cluster.appendleft(left_candidate[0])
      processed_chunks.add(left_candidate[0])
    if len(right_candidate):
      cluster.append(right_candidate[0])
      processed_chunks.add(right_candidate[0])

    left_candidate = [
      c
      for c in db.get_candidate(cluster.left)
      if c not in processed_chunks and cluster.can_appendleft(c)
    ]

    right_candidate = [
      c
      for c in db.get_candidate(cluster.right)
      if c not in processed_chunks and cluster.can_append(c)
    ]

  clusters.append(cluster)

In [6]:
print(len(clusters))
clusters = sorted(clusters, key=lambda c: c.size, reverse=True)

7373


In [7]:
n_step = 1000
canvas = np.zeros(db.data.shape)
positions = []
for cluster in tqdm(clusters, desc="Processing Clusters..."):
  start, end, height = cluster.start, cluster.end, cluster.height
  canvas_height = canvas.shape[0]
  start_position = 0
  drew = False

  while start_position < canvas_height and not drew:
    if start_position >= canvas_height:
      canvas_height = start_position + height
      canvas = np.pad(canvas, ((height, 0), (0, 0)))

    if start_position + height > canvas_height:
      canvas_height = start_position + height
      canvas = np.pad(canvas, ((0, height), (0, 0)))

    subcanvas = canvas[start_position : start_position + height, start:end]
    if cluster.can_draw(subcanvas):
      cluster.draw(subcanvas)
      positions.append(start_position)
      drew = True

    start_position += n_step

Processing Clusters...: 100%|██████████| 7373/7373 [00:04<00:00, 1713.36it/s]


In [8]:
np.sum(canvas != 0) / np.prod(canvas.shape)

np.float64(0.18295237910725357)

In [9]:
def print_row(row: np.ndarray):
  print("".join([f"{int(x)}" if x != 0 else " " for x in row]))

In [10]:
rows = []
prev_row = np.zeros(canvas.shape[1])
for i in range(0, canvas.shape[0]):
  row = canvas[i]
  if not np.array_equal(row, prev_row) and not np.sum(row) == 0:
    rows.append(row)
    print_row(row)
    prev_row = row

89484872239271778919256278562814
894848   39271778919256278562814
894848   392      19256278562814
   848   392      19256278562814
         392      192         14
                  192         14
                              14
55527127563548554588162665771514
555271   63548554588162665771514
555271   635485545881626657   14
555271   635   545881626657   14
555271            881626657   14
   271            881626657   14
                  881         14
                              14
51612245434674145293313219374271
516122   34674145293313219374271
516122   346   45293313219374271
516               933132193   71
                  933132      71
                  933         71
                              71
96672148945282987545753462648571
966721   45282987545753462648571
966721      82987545753462648571
966            87545753462648571
966               457534      71
                  457534      71
                              71
69281617882233441566355872271982
692816   8

In [11]:
len(rows)

627