In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Gingko example with simple toy data

### Create some toy text data with a hierarchical structure

In [2]:
import numpy as np

In [3]:
test_indices = np.array([
    [0,0,0,0,0,0,0,0]+[1,1,1,1,1,1,1,1]+[2,2,2,2,2,2,2,2],
    [0,0,0,0,0,0,1,1]+[0,0,0,0,0,0,1,1]+[0,0,0,0,0,0,1,1],
    [0,0,1,2,2,2,0,0]+[0,0,1,2,2,2,0,0]+[0,0,1,2,2,2,0,0],
    [0,1,0,0,1,2,0,1]+[0,1,0,0,1,2,0,1]+[0,1,0,0,1,2,0,1]
])

test_data = (
    ['hi','there','hello','how','are','you','im','good']
    +['whats','up','hey','hows','it','going','not','bad']
    +['hey','bruh','yo','whats','crackin','son','chillin','chillin']
)

### Gingko is fundamentally a COO-format n-dimensional sparse tensor

In [4]:
from gingko import Gingko

In [7]:
tree = Gingko(indices=test_indices, values=test_data, check=True)

tree.indices
tree.values

Check successful: indices match


array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
        2, 2],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        1, 1],
       [0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 1, 2, 2, 2,
        0, 0],
       [0, 1, 0, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1, 2,
        0, 1]])

['hi',
 'there',
 'hello',
 'how',
 'are',
 'you',
 'im',
 'good',
 'whats',
 'up',
 'hey',
 'hows',
 'it',
 'going',
 'not',
 'bad',
 'hey',
 'bruh',
 'yo',
 'whats',
 'crackin',
 'son',
 'chillin',
 'chillin']

### ...but Gingko is sliceable

In [8]:
tree[0,:3,:3,:3]

Gingko(indices=array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 1, 2, 2, 2, 0, 0],
       [0, 1, 0, 0, 1, 2, 0, 1]]), values=['hi', 'there', 'hello', 'how', 'are', 'you', 'im', 'good'], ptrs=[array([0, 2]), array([0, 3, 4]), array([0, 2, 3, 6, 8])], check=False)

### ...and Gingko is fast

In [9]:
%%timeit

tree[:2,:,:,:1]

126 µs ± 1.39 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
