# Setup

## Install CJK Fonts

In [None]:
# DOWNLOAD #####################################################################

!curl https://noto-website-2.storage.googleapis.com/pkgs/NotoSansCJK.ttc.zip --output noto.zip
!unzip -d ~/.fonts noto.zip

In [None]:
# UPDATE SYSTEM FONTS ##########################################################

!fc-cache -f -v
!fc-list :lang=ja

In [None]:
# CLEAN ########################################################################

!rm noto.zip
!rm -r /root/.cache/matplotlib/

In [None]:
# RESTART ######################################################################

import os
import signal

os.kill(os.getpid(), signal.SIGTERM)

In [None]:
# LOAD IN MPL ##################################################################

import matplotlib.font_manager

fm = matplotlib.font_manager.json_load("/root/.cache/matplotlib/fontlist-v390.json")
fm.findfont('Noto Sans CJK JP', rebuild_if_missing=True)

## Install Dependencies

In [None]:
!pip install -U datasets densecurves mlable

## Import Dependencies

In [None]:
%matplotlib inline

In [None]:
import functools
import math

import matplotlib as mpl
import matplotlib.axes as mpa
import matplotlib.colors as mpc
import matplotlib.pyplot as mpp
import PIL as pl

import datasets as hd
import numpy as np
import tensorflow as tf

import densecurves.hilbert
import mlable.shaping.hilbert
import mlable.utils

## Config

In [None]:
# PLOT #########################################################################

PLOT_CONFIG = {
    'family': ['DejaVu Sans', 'Noto Sans CJK JP', 'Noto Sans TC'],
    'norm': mpc.Normalize(vmin=0, vmax=10), # mc.Normalize(vmin=0, vmax=10),
    'cmap': mpc.ListedColormap(['#000000', '#0074D9','#FF4136','#2ECC40','#FFDC00', '#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25', '#FFFFFF']),} # mpl.colormaps['binary'],}

In [None]:
# DATASETS #####################################################################

DATASETS_CONFIG = {
    # 'cot-text-openthoughts': {
    #     'path': 'open-thoughts/OpenThoughts-114k',
    #     'name': 'default',
    #     'split': 'train',
    #     'features': ['problem', 'solution'],},
    'ft-asciiart-asciiart': {
        'path': 'apehex/ascii-art',
        'name': 'asciiart',
        'split': 'train',
        'features': ['content'],},
    'ft-asciiart-copypasta': {
        'path': 'apehex/ascii-art',
        'name': 'copypasta',
        'split': 'train',
        'features': ['content'],},
    'ft-asciiart-graffiti': {
        'path': 'apehex/ascii-art',
        'name': 'graffiti',
        'split': 'train',
        'features': ['content'],},
    'ft-asciiart-images': {
        'path': 'apehex/ascii-art',
        'name': 'images',
        'split': 'train',
        'features': ['content'],},
    # 'ft-asciiart-datacompdr': {
    #     'path': 'apehex/ascii-art-datacompdr-12m',
    #     'name': 'default',
    #     'split': 'train',
    #     'features': ['content'],},
    # 'cot-math-numi': {
    #     'path': 'AI-MO/NuminaMath-CoT',
    #     'name': None,
    #     'split': 'train',
    #     'features': ['problem', 'solution'],},
}

In [None]:
# SAMPLES ######################################################################

WIKI_SAMPLE = """Hilbert curve\n\nThe Hilbert curve (also known as the Hilbert space-filling curve) is a continuous fractal space-filling curve first described by the German mathematician David Hilbert in 1891,[1] as a variant of the space-filling Peano curves discovered by Giuseppe Peano in 1890.[2]\n\nBecause it is space-filling, its Hausdorff dimension is 2 (precisely, its image is the unit square, whose dimension is 2 in any definition of dimension; its graph is a compact set homeomorphic to the closed unit interval, with Hausdorff dimension 1).\n\nThe Hilbert curve is constructed as a limit of piecewise linear curves. The length of the {\\displaystyle n}th curve is {\\displaystyle \\textstyle 2^{n}-{1 \\over 2^{n}}}, i.e., the length grows exponentially with {\\displaystyle n}, even though each curve is contained in a square with area {\\displaystyle 1}.\n\nImages\n\nFirst six iterations of the Hilbert curve\n\nHilbert curve, first order\n\nHilbert curves, first and second orders\n\nHilbert curves, first to third orders\n\nProduction rules\n\nHilbert curve, construction color-coded\n\nA 3-D Hilbert curve with color showing progression\n\nVariant, first three iterations[3]\n\nApplications and mapping algorithms\n\nBoth the true Hilbert curve and its discrete approximations are useful because they give a mapping between 1D and 2D space that preserves locality fairly well.[4] This means that two data points which are close to each other in one-dimensional space are also close to each other after folding. The converse cannot always be true.\n\nBecause of this locality property, the Hilbert curve is widely used in computer science. For example, the range of IP addresses used by computers can be mapped into a picture using the Hilbert curve. Code to generate the image would map from 2D to 1D to find the color of each pixel, and the Hilbert curve is sometimes used because it keeps nearby IP addresses close to each other in the picture.[5] The locality property of the Hilbert curve has also been used to design algorithms for exploring regions with mobile robots[6][7] and indexing geospatial location data.[8]\n\nIn an algorithm called Riemersma dithering, grayscale photographs can be converted to a dithered black-and-white image using thresholding, with the leftover amount from each pixel added to the next pixel along the Hilbert curve. Code to do this would map from 1D to 2D, and the Hilbert curve is sometimes used because it does not create the distracting patterns that would be visible to the eye if the order were simply left to right across each row of pixels.[9] Hilbert curves in higher dimensions are an instance of a generalization of Gray codes, and are sometimes used for similar purposes, for similar reasons. For multidimensional databases, Hilbert order has been proposed to be used instead of Z order because it has better locality-preserving behavior. For example, Hilbert curves have been used to compress and accelerate R-tree indexes[10] (see Hilbert R-tree). They have also been used to help compress data warehouses.[11][12]\n\nThe linear distance of any point along the curve can be converted to coordinates in n dimensions for a given n, and vice versa, using any of several standard mathematical techniques such as Skilling\'s method.[13][14]\n\nIt is possible to implement Hilbert curves efficiently even when the data space does not form a square.[15] Moreover, there are several possible generalizations of Hilbert curves to higher dimensions.[16][17]\n\nRepresentation as Lindenmayer system\n\nThe Hilbert Curve can be expressed by a rewrite system (L-system).\n\nDuration: 52 seconds.0:52\nHilbert curve at its sixth iteration\nAlphabet : A, B\nConstants : F + −\nAxiom : A\nProduction rules:\nA → +BF−AFA−FB+\nB → −AF+BFB+FA−\nHere, "F" means "draw forward", "+" means "turn left 90°", "-" means "turn right 90°" (see turtle graphics), and "A" and "B" are ignored during drawing.\n\nOther implementations\n\nGraphics Gems II[18][promotion?] discusses Hilbert curve coherency, and provides implementation.\n\nThe Hilbert Curve is commonly used among rendering images or videos. Common programs such as Blender and Cinema 4D use the Hilbert Curve to trace the objects, and render the scene.[citation needed]\n\nThe slicer software used to convert 3D models into toolpaths for a 3D printer typically has the Hilbert curve as an option for an infill pattern.\n"""
JAPANESE_SAMPLE = """ヒルベルト曲線\n\nヒルベルト曲線（ヒルベルト空間充填曲線とも呼ばれる）は、1891年にドイツの数学者ダヴィド・ヒルベルトによって初めて記述された連続フラクタル空間充填曲線である[1]。これは、1890年にジュゼッペ・ペアノによって発見された空間充填ペアノ曲線の変種である[2]。\n\nヒルベルト曲線は空間充填曲線であるため、ハウスドルフ次元は2である（正確には、その像は任意の次元定義において次元が2である単位正方形である。そのグラフは、ハウスドルフ次元が1である、閉単位区間に同相なコンパクト集合である）。\n\nヒルベルト曲線は、区分線形曲線の極限として構成される。 {\\displaystyle n}番目の曲線の長さは{\\displaystyle \\textstyle 2^{n}-{1 \\over 2^{n}}}です。つまり、各曲線が面積{\\displaystyle 1}の正方形に収まっているにもかかわらず、長さは{\\displaystyle n}とともに指数関数的に増加します。\n\n画像\n\nヒルベルト曲線の最初の6回の反復\n\nヒルベルト曲線、1次\n\nヒルベルト曲線、1次と2次\n\nヒルベルト曲線、1次から3次\n\n生成規則\n\nヒルベルト曲線、色分けされた作図\n\n色分けされた3次元ヒルベルト曲線、進行を示す\n\n変種、最初の3回の反復[3]\n\n応用と写像アルゴリズム\n\n真のヒルベルト曲線とその離散近似はどちらも、1次元空間と2次元空間間の写像において局所性をかなりよく保存するため有用です。[4]これは、1次元空間で互いに近い2つのデータポイントは、折り畳み後も互いに近いことを意味します。逆は必ずしも真ではありません。\n\nこの局所性のため、ヒルベルト曲線はコンピュータサイエンスで広く用いられています。例えば、コンピュータで使用されるIPアドレスの範囲は、ヒルベルト曲線を用いて画像にマッピングできます。画像を生成するコードは、各ピクセルの色を見つけるために2次元から1次元にマッピングしますが、ヒルベルト曲線は画像内で近くのIPアドレスを互いに近くに保つため、用いられることがあります。[5] ヒルベルト曲線の局所性は、移動ロボットによる地域探索[6][7]や地理空間位置データのインデックス作成[8]のためのアルゴリズムの設計にも用いられています。\n\nリーマースマディザリングと呼ばれるアルゴリズムでは、閾値処理を用いてグレースケール写真をディザリングされた白黒画像に変換し、各ピクセルの余剰量をヒルベルト曲線に沿って次のピクセルに加算します。これを実現するコードは1次元から2次元へのマッピングとなり、ヒルベルト曲線が用いられることがあります。これは、各ピクセル行を単純に左から右へ順序付けた場合に目に見えるような、目障りなパターンを生成しないためです。[9] 高次元のヒルベルト曲線はグレイコードの一般化の一例であり、同様の理由から同様の目的で使用されることがあります。多次元データベースでは、局所性保存性に優れているため、Z順序ではなくヒルベルト順序の使用が提案されています。例えば、ヒルベルト曲線はR木インデックスの圧縮と高速化に使用されています[10]（ヒルベルトR木を参照）。また、データウェアハウスの圧縮にも使用されています[11][12]。\n\n曲線上の任意の点の直線距離は、スキリング法などの標準的な数学的手法を用いて、与えられたnに対してn次元座標に変換でき、その逆も可能です。[13][14]\n\nデータ空間が正方形でなくても、ヒルベルト曲線を効率的に実装することは可能です。[15] さらに、ヒルベルト曲線を高次元に一般化する方法がいくつかあります。[16][17]\n\nリンデンマイヤーシステムによる表現\n\nヒルベルト曲線は、書換えシステム（Lシステム）で表現できます。\n\n再生時間：52秒。0:52\n6回目の反復におけるヒルベルト曲線\nアルファベット：A, B\n定数：F + −\n公理：A\n生成規則：\nA → +BF−AFA−FB+\nB → −AF+BFB+FA−\nここで、「F」は「前方に描く」、「+」は「左に90°回転する」、「-」は「右に90°回転する」（タートルグラフィックスを参照）を意味し、「A」と「B」は描画時に無視されます。\n\nその他の実装\n\nGraphics Gems II[18][プロモーション?]では、ヒルベルト曲線の一貫性について議論し、実装を提供しています。\n\nヒルベルト曲線は、画像や動画のレンダリングでよく使用されます。BlenderやCinema 4Dなどの一般的なプログラムは、ヒルベルト曲線を用いてオブジェクトのトレースやシーンのレンダリングを行っています。[要出典]\n\n3Dモデルを3Dプリンターのツールパスに変換するスライサーソフトウェアでは、通常、ヒルベルト曲線がインフィルパターンのオプションとして提供されています。"""
ART_SAMPLE = """⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⠕⠕⠕⠕⢕⢕\n⢕⢕⢕⢕⢕⠕⠕⢕⢕⢕⢕⢕⢕⢕⢕⢕⢕⠕⠁⣁⣠⣤⣤⣤⣶⣦⡄⢑\n⢕⢕⢕⠅⢁⣴⣤⠀⣀⠁⠑⠑⠁⢁⣀⣀⣀⣀⣘⢻⣿⣿⣿⣿⣿⡟⢁⢔\n⢕⢕⠕⠀⣿⡁⠄⠀⣹⣿⣿⣿⡿⢋⣥⠤⠙⣿⣿⣿⣿⣿⡿⠿⡟⠀⢔⢕\n⢕⠕⠁⣴⣦⣤⣴⣾⣿⣿⣿⣿⣇⠻⣇⠐⠀⣼⣿⣿⣿⣿⣿⣄⠀⠐⢕⢕\n⠅⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣶⣶⣿⣿⣿⣿⣿⣿⣿⣿⣷⡄⠐⢕\n⠀⢸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡄⠐\n⢄⠈⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡆\n⢕⢔⠀⠈⠛⠿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿\n⢕⢕⢄⠈⠳⣶⣶⣶⣤⣤⣤⣤⣭⡍⢭⡍⢨⣯⡛⢿⣿⣿⣿⣿⣿⣿⣿⣿\n⢕⢕⢕⢕⠀⠈⠛⠿⢿⣿⣿⣿⣿⣿⣦⣤⣿⣿⣿⣦⣈⠛⢿⢿⣿⣿⣿⣿\n⢕⢕⢕⠁⢠⣾⣶⣾⣭⣖⣛⣿⠿⣿⣿⣿⣿⣿⣿⣿⣿⣷⡆⢸⣿⣿⣿⡟\n⢕⢕⠅⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠟⠈⢿⣿⣿⡇\n⢕⠕⠀⠼⠟⢉⣉⡙⠻⠿⢿⣿⣿⣿⣿⣿⡿⢿⣛⣭⡴⠶⠶⠂⠀⠿⠿⠇"""

## Download The Datasets

In [None]:
# DOWNLOAD #####################################################################

DATASETS = {
    __name: hd.load_dataset(path=__args['path'], name=__args['name'], split=__args['split']).to_tf_dataset(shuffle=True, batch_size=None)
    for __name, __args in DATASETS_CONFIG.items()}

## Preprocess The Datasets

In [None]:
# REMOVE COLOR CODES ###########################################################

ANSI_REGEX = r'\x1b\[[0-9;]*[mGKHF]'

clean = functools.partial(tf.strings.regex_replace, pattern=ANSI_REGEX, rewrite='', replace_global=True)

def preprocess(sample: dict, operator: callable, targets: list) -> tf.Tensor:
    return {
        __k: operator(__v) if __k in targets else __v
        for __k, __v in sample.items()}

In [None]:
# ITERATE ######################################################################

for __name in DATASETS:
    # specialized preprocessing fn
    __preprocess = functools.partial(preprocess, operator=clean, targets=DATASETS_CONFIG[__name]['features'])
    # apply
    DATASETS[__name] = DATASETS[__name].map(__preprocess, num_parallel_calls=tf.data.AUTOTUNE)

# RGB Text

## Encoding

In [None]:
# RGB ENCODING #################################################################

def rgb(rows: list) -> np.ndarray:
    __height, __width = len(rows), len(rows[0])
    # each character is encoded as 4 bytes
    __rows = [list(__r.encode('utf-32-be')) for __r in rows]
    # 2d reshaping
    __array = np.array(__rows, dtype=np.uint8).reshape((__height, __width, 4))
    # strip the leading byte, always null in utf-32 (big-endian)
    return __array[..., 1:]

In [None]:
# CUSTOM COLOR SCHEMES #########################################################

def mix_channels(channels: np.ndarray) -> np.ndarray:
    __mod = np.array(3 * [256], dtype=channels.dtype)
    __mix = [channels[0] + channels[-1], channels[1] + channels[-1], channels[-1]]
    return np.mod(__mix, __mod)

def rgb_mixed(rows: list) -> np.ndarray:
    return np.apply_along_axis(mix_channels, arr=rgb(rows).astype(np.int32), axis=-1)

def rgb_hilbert(rows: list) -> np.ndarray:
    __height, __width = len(rows), len(rows[0])
    # each character is encoded as 4 bytes
    __rows = [[densecurves.hilbert.point(ord(__c), order=8, rank=3) for __c in __r] for __r in rows]
    # cast and reshape
    return np.array(__rows, dtype=np.uint8).reshape((__height, __width, 3))

## Display

In [None]:
# CONTEXT ######################################################################

class PlotContext:
    def __init__(self, rows: int, cols: int, zoom: iter=(4, 4), meta: bool=True, **kwargs) -> None:
        self._rows = rows
        self._cols = cols
        self._zoom = zoom
        self._meta = meta
        self._args = dict(kwargs)
        self._size = (zoom[0] * cols, zoom[-1] * rows)
        self._figure = None
        self._axes = None

    def __enter__(self) -> tuple:
        self._figure, self._axes = mpp.subplots(nrows=self._rows, ncols=self._cols, figsize=self._size, **self._args)
        # toggle the lines
        for __a in self._figure.axes:
            __a.get_xaxis().set_visible(self._meta)
            __a.get_yaxis().set_visible(self._meta)
        # return to the execution env
        return (self._figure, self._axes)

    def __exit__(self, exc_type: any, exc_value: any, traceback: any) -> None:
        mpp.tight_layout()
        mpp.show()

In [None]:
# IMAGE WITH CAPTION OVERLAY ###################################################

def matshow(axes: mpa.Axes, data: iter=(), curve: iter=(), text: iter=(), family: iter=None, cmap: mpc.Colormap=None) -> None:
    # image like display of an array
    if len(data):
        axes.matshow(data, cmap=cmap)
    # path of the curve
    if len(curve):
        axes.plot(curve[0], curve[-1], color='black')
    # add a text overlay
    for __j in range(len(text)):
        for __i in range(len(text[__j])):
            if text[__j][__i] not in ' \x00':
                axes.text(__i, __j, str(text[__j][__i]), va='center', ha='center', color='white', family=family)

## Samples

In [None]:
# ENGLISH ######################################################################

__s = [WIKI_SAMPLE[15:79]]
__x = rgb(__s)
__y = rgb_mixed(__s)
__h = rgb_hilbert(__s)
# RGB
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, text=__s, axes=__axes)
# MIXED
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__y, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__s, axes=__axes)
# HILBERT
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, text=__s, axes=__axes)

In [None]:
# JAPANESE #####################################################################

__s = [JAPANESE_SAMPLE[17:81],]
__x = rgb(__s)
__y = rgb_mixed(__s)
__h = rgb_hilbert(__s)
# RGB
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, text=__s, axes=__axes, family=['Noto Sans CJK JP', 'sans-serif'])
# MIXED
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__y, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__s, axes=__axes, family=['Noto Sans CJK JP', 'sans-serif'])
# HILBERT
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, axes=__axes)
with PlotContext(rows=1, cols=1, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, text=__s, axes=__axes, family=['Noto Sans CJK JP', 'sans-serif'])

## Attention Patterns

In [None]:
# single sequence axis: dimension of 10k to 10M tokens

# 2D Text (Naive Approach)

## Format

In [None]:
# 1D => 2D #####################################################################

def split(text: str, height: int=-1, width: int=-1, separator: str='') -> list:
    # typically split on \n or at a fixed size
    __rows = text.split(separator) if separator else mlable.utils.chunk(text, width)
    # :width would leave one character out when width == -1
    __width = slice(width if (width > 0) else None)
    # idem fro the height
    __height = slice(height if (height > 0) else None)
    # enforce the maximum dimensions
    return [__r[__width] for __r in __rows[__height] if __r]

In [None]:
# FILL ROWS ####################################################################

def pad(rows: list, width: int, value: str='\x00') -> list:
    return [__r + (width - len(__r)) * value for __r in rows]

## Samples

In [None]:
# ASCII ART ####################################################################

__s = pad(split(ART_SAMPLE.replace('\n', ''), width=28, separator=''), width=28)
__x = rgb(__s)
__y = 32 * np.ones(__x.shape, dtype=np.int32)
__z = rgb_mixed(__s)
__h = rgb_hilbert(__s)
# RGB
with PlotContext(rows=1, cols=3, zoom=(5, 5), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__s, axes=__axes[0])
    matshow(data=__x, axes=__axes[1])
    matshow(data=__x, text=__s, axes=__axes[-1])
# MIXED
with PlotContext(rows=1, cols=3, zoom=(5, 5), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__s, axes=__axes[0])
    matshow(data=__z, axes=__axes[1])
    matshow(data=__z, text=__s, axes=__axes[-1])
# HILBERT
with PlotContext(rows=1, cols=3, zoom=(5, 5), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__s, axes=__axes[0])
    matshow(data=__h, axes=__axes[1])
    matshow(data=__h, text=__s, axes=__axes[-1])

In [None]:
# MORE AA ######################################################################

__i = iter(DATASETS['ft-asciiart-images'])

In [None]:
__s = next(__i)
__d = __s['caption'].numpy().decode('utf-8')
__l = __s['labels'].numpy().decode('utf-8')
__t = split(__s['content'].numpy().decode('utf-8'), width=-1, separator='\n')
__x = rgb(__t)
__y = 32 * np.ones(__x.shape, dtype=np.int32)
__z = rgb_mixed(__t)
__h = rgb_hilbert(__t)
print('{} | {}'.format(__d, __l))
# RGB
with PlotContext(rows=1, cols=3, zoom=(10, 10), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__t, axes=__axes[0])
    matshow(data=__x, axes=__axes[1])
    matshow(data=__x, text=__t, axes=__axes[-1])
# MIXED
with PlotContext(rows=1, cols=3, zoom=(10, 10), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__t, axes=__axes[0])
    matshow(data=__z, axes=__axes[1])
    matshow(data=__z, text=__t, axes=__axes[-1])
# MIXED
with PlotContext(rows=1, cols=3, zoom=(10, 10), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__t, axes=__axes[0])
    matshow(data=__h, axes=__axes[1])
    matshow(data=__h, text=__t, axes=__axes[-1])

In [None]:
# SPLIT ON NEWLINES ############################################################

__s = pad(split(WIKI_SAMPLE, width=64, separator='\n'), width=64)
__x = rgb(__s)
__z = rgb_mixed(__s)
__h = rgb_hilbert(__s)
# RGB
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, axes=__axes[0])
    matshow(data=__x, text=__s, axes=__axes[-1])
# MIXED
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__z, axes=__axes[0])
    matshow(data=__z, text=__s, axes=__axes[-1])
# HILBERT
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, axes=__axes[0])
    matshow(data=__h, text=__s, axes=__axes[-1])

In [None]:
# KOREAN #######################################################################

__s = pad(split(JAPANESE_SAMPLE, width=64, separator='\n'), width=64)
__x = rgb(__s)
__z = rgb_mixed(__s)
__h = rgb_hilbert(__s)
# RGB
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, axes=__axes[0])
    matshow(data=__x, text=__s, axes=__axes[-1], family=['Noto Sans CJK JP', 'sans-serif'])
# MIXED
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__z, axes=__axes[0])
    matshow(data=__z, text=__s, axes=__axes[-1], family=['Noto Sans CJK JP', 'sans-serif'])
# HILBERT
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__h, axes=__axes[0])
    matshow(data=__h, text=__s, axes=__axes[-1], family=['Noto Sans CJK JP', 'sans-serif'])

In [None]:
# FIXED SIZE CHUNKS ############################################################

__s = pad(split(WIKI_SAMPLE, width=64, separator=''), width=64)
__x = rgb(__s)
with PlotContext(rows=1, cols=2, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(data=__x, axes=__axes[0])
    matshow(data=__x, text=__s, axes=__axes[-1])

## Attention Patterns

In [None]:
# height and width attention
# correlated on ASCII art
# width attention is meaningful
# height attention has a discontinuity
# could be bridged layer by layer as the perception field extends and also merges
# ideally: all the characters (tokens) in an area should be related

# Text Along The Hilbert Curve

## Visualizing The 1D Position

In [None]:
__text = tf.cast(list(WIKI_SAMPLE[15:4111]), dtype=tf.string)
__text = mlable.shaping.hilbert.fold(__text, axis=0, order=6, rank=2)
__text = [[__c.decode('utf-8') for __c in __r] for __r in __text.numpy().tolist()]

In [None]:
__position = tf.cast(range(4096), dtype=tf.int32)
__position = mlable.shaping.hilbert.fold(__position, axis=0, order=6, rank=2)

In [None]:
__curve = [densecurves.hilbert.point(__i, order=6, rank=2) for __i in range(4096)]
__curve = list(zip(*__curve))

In [None]:
__colors = tf.cast(rgb([WIKI_SAMPLE[:4096]]), dtype=tf.int32)
__colors = mlable.shaping.hilbert.fold(__colors, axis=1, order=6, rank=2)

In [None]:
with PlotContext(rows=1, cols=3, zoom=(16, 16), meta=False) as (__fig, __axes):
    matshow(axes=__axes[0], data=__position, curve=__curve[::-1], cmap='plasma')
    matshow(axes=__axes[1], data=__position, text=__text, curve=__curve[::-1], cmap='plasma')
    matshow(axes=__axes[2], data=__colors[0])

# Higher Order Curves

## Display Tools

In [None]:
# IMAGE WITH CAPTION OVERLAY ###################################################

def matshow3d(axes: mpa.Axes, data: iter=(), curve: iter=(), text: iter=(), family: iter=None, cmap: mpc.Colormap=None) -> None:
    # image like display of an array
    if len(data):
        __fill = np.ones(tuple(data.shape)[:-1], dtype=np.bool)
        axes.voxels(filled=__fill, facecolors=data)
    # path of the curve
    if len(curve):
        __x, __y, __z = curve
        __u, __v, __w = [[__p[-1] -__p[0] for __p in list(zip(__a[:-1], __a[1:]))] for __a in curve]
        __c = [mpl.cm.gnuplot(__r) for __r in np.linspace(0, 1, len(__x))]
        axes.quiver(__x, __y, __z, __u + [0], __v + [0], __w + [0], color=__c, alpha=0.5)
    # add a text overlay
    for __k in range(len(text)):
        for __j in range(len(text[__k])):
            for __i in range(len(text[__k][__j])):
                if text[__k][__j][__i] not in ' \x00':
                    axes.text(__i + 0.1, __j + 0.1, __k + 0.1, str(text[__k][__j][__i]), va='center', ha='center', color='black', family=family)

## Samples

In [None]:
__curve = [densecurves.hilbert.point(__i, order=4, rank=3) for __i in range(4096)]
__curve = list(zip(*__curve))

In [None]:
__colors = tf.cast(rgb([WIKI_SAMPLE[:4096]]), dtype=tf.int32)
__colors = mlable.shaping.hilbert.fold(__colors, axis=1, order=4, rank=3)

In [None]:
__text = tf.cast(list(WIKI_SAMPLE[:4096]), dtype=tf.string)
__text = mlable.shaping.hilbert.fold(__text, axis=0, order=4, rank=3)
__text = [[[__h.decode('utf-8') for __h in __c] for __c in __r] for __r in __text.numpy().tolist()]

In [None]:
with PlotContext(rows=1, cols=3, zoom=(16, 16), meta=False, subplot_kw={'projection': '3d'}) as (__fig, __axes):
    matshow3d(axes=__axes[0], curve=__curve[::-1], cmap='plasma')
    matshow3d(axes=__axes[1], text=__text, curve=__curve[::-1], cmap='plasma')
    matshow3d(axes=__axes[2], data=__colors[0].numpy() / 255.)

In [None]:
__curve = [densecurves.hilbert.point(__i, order=3, rank=3) for __i in range(512)]
__curve = list(zip(*__curve))
with PlotContext(rows=1, cols=1, zoom=(8, 8), meta=False, subplot_kw={'projection': '3d'}) as (__fig, __axes):
    matshow3d(axes=__axes, curve=__curve[::-1], cmap='plasma')

In [None]:
__colors = [[[[__r, __g, __b] for __b in range(256)] for __g in range(256)] for __r in range(256)]
__colors = np.array(__colors, dtype=np.uint8)

In [None]:
# with PlotContext(rows=1, cols=1, zoom=(8, 8), meta=False, subplot_kw={'projection': '3d'}) as (__fig, __axes):
#     matshow3d(axes=__axes, data=__colors / 255.)

## Filter The Dataset

In [None]:
__x, __y, __z = np.indices((4, 4, 4))

In [None]:
def _filter(sample: dict, pattern: str=r'width (\d+),') -> bool:
    __option = tf.strings.split(sample['labels'], sep=',')
    __width = tf.strings.substr(__option[0], pos=6, len=4)
    return tf.strings.to_number(__width, tf.int32) <= 64

In [None]:
__d = DATASETS['ft-asciiart-images'].filter(_filter)

In [None]:
__i = iter(__d)

In [None]:
__s = next(__i)
__d = __s['caption'].numpy().decode('utf-8')
__l = __s['labels'].numpy().decode('utf-8')
__t = split(__s['content'].numpy().decode('utf-8'), width=-1, separator='\n')
__x = rgb(__t)
__y = 32 * np.ones(__x.shape, dtype=np.int32)
print('{} | {}'.format(__d, __l))
with PlotContext(rows=1, cols=3, zoom=(10, 10), meta=False) as (__fig, __axes):
    matshow(data=__y, text=__t, axes=__axes[0])
    matshow(data=__x, axes=__axes[1])
    matshow(data=__x, text=__t, axes=__axes[-1])