## Import deps

In [None]:
!pip install -U datasets mlable tokun llaminate

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mlable
  Downloading mlable-0.6.1-py3-none-any.whl (19 kB)
Collecting tokun
  Downloading tokun-0.9.1-py3-none-any.whl (18 kB)
Collecting llaminate
  Downloading llaminate-0.3.7-py3-none-any.whl (9.1 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downlo

In [None]:
!huggingface-cli login # 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

%load_ext tensorboard


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
import datetime
import functools
import itertools
import math
import os
import random
import urllib.request

import datasets as ds
import tensorflow as tf
import tensorflow_datasets as tfds

import mlable.data
import mlable.metrics

import tokun.data
import tokun.evaluation
import tokun.meta
import tokun.model
import tokun.pipeline

import llaminate.model
import llaminate.pipeline
import llaminate.utils

In [None]:
print("Tensorflow version " + tf.__version__)

Tensorflow version 2.15.0


## Setup the GPU / TPU

In [None]:
# MIXED PRECISION #############################################################

tf.keras.mixed_precision.set_global_policy('mixed_bfloat16')

In [None]:
# DEVICES #####################################################################

tf.debugging.set_log_device_placement(False)

CPU = tf.config.list_logical_devices('CPU')
GPU = tf.config.list_logical_devices('GPU')
TPU = tf.config.list_logical_devices('TPU')

if TPU:
    RESOLVER = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(RESOLVER)
    tf.tpu.experimental.initialize_tpu_system(RESOLVER)
    DISTRIBUTION_STRATEGY = tf.distribute.TPUStrategy(RESOLVER)
elif GPU:
    DISTRIBUTION_STRATEGY = tf.distribute.MirroredStrategy(GPU)
else:
    DISTRIBUTION_STRATEGY = tf.distribute.MirroredStrategy(CPU)

print(DISTRIBUTION_STRATEGY)

<tensorflow.python.distribute.tpu_strategy.TPUStrategyV2 object at 0x7e434a74c460>


## Mode

In [None]:
# TOGGLE ######################################################################

IMPORT = False
FREEZE = True # freeze tokun weights
TRAINING = True
DEBUG = False

## Defining The Metadata

In [None]:
# MODEL PARAMETERS ############################################################

N_SEQUENCE_AXIS = 1
N_FEATURE_AXIS = -1

N_LAYERS_NUM = 8
N_HEADS_NUM = 4

N_CACHE_DIM = 256 # 2048 in llama3-8B but tokun embeddings = 16 chr = 4 llama3 tokens
N_EMBED_DIM = 256
N_HIDDEN_DIM = 4 * N_EMBED_DIM
N_HEAD_DIM = N_EMBED_DIM // N_HEADS_NUM

LLAMINATE_PATH = 'llaminate.keras'

In [None]:
# TOKENIZER PARAMETERS ########################################################

TOKUN_DIM = [4, 16]
TOKUN_FACTOR = math.prod(TOKUN_DIM) // 4
TOKUN_VERSION = tokun.meta.version(units=TOKUN_DIM, axis=1)

TOKUN_LABEL = '7.3'
TOKUN_PATH = 'tokun.keras'
TOKUN_URL = 'https://github.com/apehex/tokun/raw/main/models/{}/{}/{}.keras'.format(*TOKUN_VERSION, TOKUN_LABEL)

In [None]:
# TRAINING PARAMETERS #########################################################

N_BATCH_DIM = 128
N_SAMPLE_DIM = N_CACHE_DIM * TOKUN_FACTOR

N_EPOCHS = 8

R_0, B_1, B_2 = (0.1 if IMPORT else 1.) * 0.001, 0.9, 0.99

CLASS_WEIGHTS = {__c: 0.3 if __c == 0 else 1. for __c in range(256)} # there are 3 times more 0s than other bytes

In [None]:
# DERIVED PARAMETERS ##########################################################

DATETIME = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

LLAMINATE_VERSION = [str(N_LAYERS_NUM), str(N_HIDDEN_DIM)]
LLAMINATE_LOGS_PATH = os.path.join('.logs/', *LLAMINATE_VERSION, DATETIME)
LLAMINATE_MODEL_PATH = 'llaminate.keras'

## Loading The Data

In [None]:
# META ########################################################################

# TODO bigcode/the-stack
# TODO ArmelR/stack-exchange-instruction

DATASETS_META = {
    'pt-wikipedia': {
        'path': 'wikimedia/wikipedia',
        'name': '20231101.en',
        'train': 'train[:90%]',
        'test': 'train[-10%:]',
        'features': ['text'],},
    'ft-retro-ascii-art': {
        'path': 'jdpressman/retro-ascii-art-v1',
        'name': None,
        'train': 'train',
        'test': 'validation',
        'features': ['prompt', 'art_aic'],},
    'ft-stack-exchange': {
        'path': 'Alignment-Lab-AI/Stack-Exchange-April',
        'name': None,
        'train': 'train[:90%]',
        'test': 'train[-10%:]',
        'features': ['question', 'answer'],},
    'ft-math': {
        'path': 'hendrycks/competition_math',
        'name': None,
        'train': 'train',
        'test': 'test',
        'features': ['problem', 'solution'],},}

In [None]:
# DOWNLOAD ####################################################################

DATASETS = {
    __name: {
        'train': ds.load_dataset(path=__args['path'], name=__args['name'], split=__args['train']).to_tf_dataset(shuffle=True, batch_size=None),
        'test': ds.load_dataset(path=__args['path'], name=__args['name'], split=__args['test']).to_tf_dataset(shuffle=True, batch_size=None),}
    for __name, __args in DATASETS_META.items()}

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/33 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

## Checking The Data

In [None]:
# STATS #######################################################################

STATS = {__n: {'min': 0, 'max': 0, 'mean': 0} for __n in DATASETS}

for __name in DATASETS:
    # sample each dataset
    __m = DATASETS_META[__name]
    __b = iter(DATASETS[__name]['train'])
    __s = [next(__b) for _ in range(128)]
    __l = [len(tf.strings.join(inputs=[__e[__f] for __f in __m['features']], separator='\x1d').numpy()) for __e in __s]
    # save the stats
    STATS[__name]['min'] = min(__l)
    STATS[__name]['max'] = max(__l)
    STATS[__name]['mean'] = tf.reduce_mean(__l).numpy()

In [None]:
print(STATS)

{'pt-wikipedia': {'min': 116, 'max': 27133, 'mean': 2931}, 'ft-retro-ascii-art': {'min': 3017, 'max': 3238, 'mean': 3146}, 'ft-stack-exchange': {'min': 289, 'max': 9503, 'mean': 1935}, 'ft-math': {'min': 100, 'max': 4771, 'mean': 740}}


In [None]:
__b = iter(DATASETS['ft-stack-exchange']['train'])

In [None]:
__s = next(__b)
tf.strings.join(inputs=[__s['question'], __s['answer']], separator='\x1d')

<tf.Tensor: shape=(), dtype=string, numpy=b"List files sorted according to specific line of contents\n\nI have a directory of files. There is a line in each file that says:\n# order: N\n\nwhere N is an integer number. I want to list all files in the directory (or even include them in wrapper script) according to that N number. Is this possible from a bash command-line?\n\n\n\x1dWith zsh, you can define a glob sorting order based on the content of those lines with:\nbyOrder() REPLY=$(grep '^# order:' < $REPLY)\n\nand then use it for example with:\nprintf '%s\\n' *(.no+byOrder)\n\nor\nsorted_file_list=(*(.no+byOrder))\n\n(also adding a . to the glob qualifier to only consider regular files (not directories, fifos, symlinks...)).\n">

## Preprocess

In [None]:
# ITERATE #####################################################################

for __name in DATASETS:
    # specialized preprocessing fn
    __preprocess = functools.partial(llaminate.pipeline.preprocess, batch_dim=N_BATCH_DIM, token_dim=math.prod(TOKUN_DIM), embed_dim=N_EMBED_DIM, sample_dim=N_SAMPLE_DIM, features=DATASETS_META[__name]['features'], weight=True)
    # preprocess datasets
    DATASETS[__name]['train'] = DATASETS[__name]['train'].batch(N_BATCH_DIM, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE).map(__preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    DATASETS[__name]['test'] = DATASETS[__name]['test'].batch(N_BATCH_DIM, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE).map(__preprocess, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
# CONCATENATE #################################################################

DATASET_TRAIN = functools.reduce(lambda __l, __r: __l.concatenate(__r), [DATASETS[__n]['train'] for __n in (set(DATASETS.keys()) - {'ft-retro-ascii-art'})]) # - {'pt-wikipedia'}
DATASET_TEST = functools.reduce(lambda __l, __r: __l.concatenate(__r), [DATASETS[__n]['test'] for __n in (set(DATASETS.keys()) - {'ft-retro-ascii-art'})]) # - {'pt-wikipedia'}

In [None]:
# CHECK DATASET ###############################################################

print(DATASET_TRAIN.element_spec)
print(DATASET_TEST.element_spec)

(TensorSpec(shape=(128, 16384), dtype=tf.uint8, name=None), TensorSpec(shape=(128, 16384, 256), dtype=tf.float32, name=None), TensorSpec(shape=(128, 16384), dtype=tf.float32, name=None))
(TensorSpec(shape=(128, 16384), dtype=tf.uint8, name=None), TensorSpec(shape=(128, 16384, 256), dtype=tf.float32, name=None), TensorSpec(shape=(128, 16384), dtype=tf.float32, name=None))


In [None]:
print('train: {:,} samples'.format(DATASET_TRAIN.cardinality().numpy()))
print('test:  {:,} samples'.format(DATASET_TEST.cardinality().numpy()))

train: 73,681 samples
test:  8,222 samples


## Load The Tokenizer

In [None]:
# DOWNLOAD ####################################################################

urllib.request.urlretrieve(TOKUN_URL, TOKUN_PATH)

('tokun.keras', <http.client.HTTPMessage at 0x7e39802d9ea0>)

## Initializing The Model

In [None]:
# DEBUG MODEL #################################################################

class DebugModel(tf.keras.Sequential):
    def __init__(self, **kwargs) -> None:
        super(DebugModel, self).__init__(
            layers=[
                tf.keras.layers.Embedding(
                    input_dim=N_EMBED_DIM,
                    output_dim=N_EMBED_DIM,
                    embeddings_initializer='glorot_uniform',
                    name='embed-1')])


In [None]:
# OVERALL SCOPE ###############################################################

with DISTRIBUTION_STRATEGY.scope():
    # TOKENIZER ###############################################################
    TOKUN = tf.keras.models.load_model(TOKUN_PATH, compile=False)
    TOKUN.trainable = not FREEZE # freeze the weights

    # METRICS #################################################################
    byte_accuracy = mlable.metrics.CategoricalGroupAccuracy(group=1, name='byte_accuracy')
    character_accuracy = mlable.metrics.CategoricalGroupAccuracy(group=4, name='character_accuracy')
    token_accuracy = mlable.metrics.CategoricalGroupAccuracy(group=math.prod(TOKUN_DIM), name='token_accuracy')

    # WEIGHTS #################################################################
    if IMPORT and os.path.isfile(LLAMINATE_MODEL_PATH):
        LLAMINATE = tf.keras.models.load_model(LLAMINATE_MODEL_PATH, compile=False)
    else:
        LLAMINATE = llaminate.model.Transformer(num_layers=N_LAYERS_NUM, num_heads=N_HEADS_NUM, cache_dim=N_CACHE_DIM, embed_dim=N_EMBED_DIM, head_dim=N_HEAD_DIM, hidden_dim=N_HIDDEN_DIM)

    # INIT ####################################################################
    LLAMINATE.set_tokenizer(encoder=TOKUN._encoder, decoder=TOKUN._decoder)
    # simpler model to debug
    if DEBUG: LLAMINATE = DebugModel()

    # INPUT ###################################################################
    __input = tf.keras.Input(shape=(4 * TOKUN_FACTOR * N_CACHE_DIM,), batch_size=N_BATCH_DIM)
    LLAMINATE = tf.keras.models.Model(__input, LLAMINATE(__input))

    # COMPILE #################################################################
    LLAMINATE.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=R_0, beta_1=B_1, beta_2=B_2),
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False, label_smoothing=0., axis=-1, reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE, name='cce_loss'),
        weighted_metrics=[byte_accuracy, character_accuracy, token_accuracy])

In [None]:
# INSPECT #####################################################################

__b = iter(DATASET_TEST)

In [None]:
__x, __y, __m = next(__b)
print(__x.shape)
print(__y.shape)
print(__m.shape)
print(LLAMINATE(__x, training=True).shape)

(128, 16384)
(128, 16384, 256)
(128, 16384)
(128, 16384, 256)


In [None]:
TOKUN.summary()

Model: "auto_encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  1377792   
                                                                 
 decoder (Decoder)           multiple                  1382656   
                                                                 
Total params: 2760448 (10.53 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 2760448 (10.53 MB)
_________________________________________________________________


In [None]:
LLAMINATE.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(16, 16384)]             0         
                                                                 
 transformer (Transformer)   (16, 16384, 256)          11157760  
                                                                 
Total params: 11157760 (42.56 MB)
Trainable params: 8397312 (32.03 MB)
Non-trainable params: 2760448 (10.53 MB)
_________________________________________________________________


## Train

In [None]:
# TRAIN #######################################################################

if TRAINING:
    with DISTRIBUTION_STRATEGY.scope():
        # callbacks
        cp_callback = tf.keras.callbacks.ModelCheckpoint(LLAMINATE_MODEL_PATH, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=LLAMINATE_LOGS_PATH)
        # model fitting
        TRAINING_HISTORY = LLAMINATE.fit(
            x=DATASETS['ft-stack-exchange']['train'].prefetch(tf.data.AUTOTUNE),
            batch_size=None,
            epochs=N_EPOCHS,
            validation_split=None,
            validation_data=DATASETS['ft-stack-exchange']['test'].prefetch(tf.data.AUTOTUNE),
            validation_freq=list(range(1, N_EPOCHS + 1, 1)),
            class_weight=CLASS_WEIGHTS,
            verbose=1,
            callbacks=[cp_callback, tb_callback])

Epoch 1/8




Epoch 1: val_loss improved from inf to 0.81156, saving model to llaminate.keras


## Dataviz

In [None]:
# DATA ########################################################################

SAMPLES = [
    """위키백과, 우리 모두의 백과사전.\nt-분포 확률적 임베딩(t-SNE)은 데이터의 차원 축소에 사용되는 기계 학습 알고리즘 중 하나로, 2002년 샘 로이스Sam Rowise와 제프리 힌튼에 의해 개발되었다.[1] t-SNE는 비선형 차원 축소 기법으로, 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용된다. 구체적으로 t-SNE는 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑한다.""",
    """class Encoder(tf.keras.models.Model):\n    def __init__(self, depth: int, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, attention: bool=False, **kwargs) -> None:\n        super(Encoder, self).__init__(**kwargs)\n        self._encoder = tf.keras.Sequential([\n            tf.keras.Input(shape=(encoding_dim,), batch_size=batch_dim, name='input'), # (B * G ^ D, U)\n            tf.keras.layers.Dense(units=embedding_dim, activation=None, use_bias=False, kernel_initializer='glorot_uniform', bias_initializer=None, name='embed-1'),] # (B * G ^ D, U) => (B * G ^ D, E)\n            + [tokun.layers.TokenizeBlock(left_axis=-2, right_axis=-1, token_dim=token_dim, latent_dim=latent_dim, attention=attention, name='tokenize' + (__i + 1) * '-4') for __i in range(depth)]) # (B * G ^ i, E) => (B * G ^ (i-1), E)\n\n    def call(self, x: tf.Tensor) -> tf.Tensor:\n        return self._encoder(x)\n""",
    """class AutoEncoder(tf.keras.models.Model):\n    def __init__(self, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, **kwargs) -> None:\n        super(AutoEncoder, self).__init__(**kwargs)\n        self._encoder = Encoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n        self._decoder = Decoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n\n    def call(self, x: tf.Tensor) -> tf.Tensor:\n        return self._decoder(self._encoder(x))""",
    """class AutoEncoder(tf.keras.models.Model):\n  def __init__(self, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, **kwargs) -> None:\n    super(AutoEncoder, self).__init__(**kwargs)\n    self._encoder = Encoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n    self._decoder = Decoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n\n  def call(self, x: tf.Tensor) -> tf.Tensor:\n    return self._decoder(self._encoder(x))"""]

In [None]:
# CACHE #######################################################################

__cache = llaminate.utils.create_cache(batch_dim=N_BATCH_DIM, cache_dim=N_CACHE_DIM, head_dim=N_HEAD_DIM, num_layers=N_LAYERS_NUM, num_heads=N_HEADS_NUM)
__step = 4

In [None]:
# PREPROCESS ##################################################################

__prompt = """Skynet is an artificial neural network-based conscious group mind and artificial general superintelligence system that serves as the antagonistic force of the Terminator franchise."""
__inputs = tokun.pipeline.preprocess(text=__prompt, token_size=4 * N_SAMPLE_DIM, expand=N_SEQUENCE_AXIS * [1])

In [None]:
# PREDICT #####################################################################

__predictions = LLAMINATE(inputs=__inputs, training=False, mask=None)
tokun.pipeline.postprocess(__predictions)

'\x1e!\xa0ÄD𐀀𐀥ô \x96𐂷=G\x00\x00\x00\x02!`ªD𐃐𐀥· \x96𐂷=p\x1e\xa0\x00\x02~`ªD𐃐𐀍· \x96𐂷=p\x1e\x00\x00\x02\x02\xa0ªD𐀀𐀍·h\x96𐂷=ö\x1et\x00\x02~`ªD𐃐𐀍·\x1b\x96𐂷=pß\x9a \x02Õ`ª\uf744\U0001007e𐀍·h\x80𐂷=p\x00\x9a\x00\x02\x00\x00ª\uf744\U0001007e𐀥·h\x96𐂷=pnt\x00Ç~`ªD\U0001007e𐀥· Ò𐂷ªpn\x9aÄ\x02Õ`ªD𐀀𐀥\r \x96𐂷\xa0öß\x00\x00\x02!\x00ªD\U0001007e𐀥·hÒ𐂷=p\x00t\x00\x02\x02\x00ªD𐀀𐀍· \x96𐂷\xa0pß\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aed2\U0003b809\U000352bd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aed2\U00035197𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aed2𫣄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aebb𥇄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aed2𥇄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aed2𥇄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aebb𥇄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aebb𥇄𥊽\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\U0003aebb𥇄𥊽\x00\x00\x00\x00\x00\x

In [None]:
__batch = iter(DATASETS['ft-stack-exchange']['train'])

In [None]:
__x, __y, __m = next(__batch)
__p = LLAMINATE(inputs=__x, training=True, mask=None)

__xt = tokun.pipeline.postprocess(__x, onehot=False)
__yt = tokun.pipeline.postprocess(__y)
__yp = tokun.pipeline.postprocess(__p)

In [None]:
print(tokun.evaluation.compare(__yt, __yp))
print(__yt)
print(__yp)

In [None]:
tf.argmax(LLAMINATE._decoder(LLAMINATE._encoder(__x[:, :128])), axis=-1)

In [None]:
__x[:, :128]

## Logs

In [None]:
%tensorboard --logdir .logs