## Import deps

In [None]:
!pip install -U datasets mlable tokun llaminate

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting mlable
  Downloading mlable-0.8.0-py3-none-any.whl.metadata (4.7 kB)
Collecting tokun
  Downloading tokun-0.13.4-py3-none-any.whl.metadata (7.7 kB)
Collecting llaminate==0.5.8a
  Downloading llaminate-0.5.8a0-py3-none-any.whl.metadata (1.8 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets)
  Downloading aiohappyeyeballs-2.4.0-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from 

In [None]:
!huggingface-cli login # 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

%load_ext tensorboard


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
import datetime
import functools
import itertools
import math
import os
import random
import urllib.request

import datasets as hd
import tensorflow as tf

import mlable.data
import mlable.metrics

import tokun.data
import tokun.evaluation
import tokun.meta
import tokun.model
import tokun.pipeline

import llaminate.model
import llaminate.pipeline
import llaminate.utils

In [None]:
print("Tensorflow version " + tf.__version__)

Tensorflow version 2.15.0


## Setup the GPU / TPU

In [None]:
# MIXED PRECISION #############################################################

tf.keras.mixed_precision.set_global_policy('mixed_bfloat16')

In [None]:
# DEVICES #####################################################################

tf.debugging.set_log_device_placement(False)

CPU = tf.config.list_logical_devices('CPU')
GPU = tf.config.list_logical_devices('GPU')
TPU = tf.config.list_logical_devices('TPU')

if TPU:
    RESOLVER = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(RESOLVER)
    tf.tpu.experimental.initialize_tpu_system(RESOLVER)
    DISTRIBUTION_STRATEGY = tf.distribute.TPUStrategy(RESOLVER)
elif GPU:
    DISTRIBUTION_STRATEGY = tf.distribute.MirroredStrategy(GPU)
else:
    DISTRIBUTION_STRATEGY = tf.distribute.MirroredStrategy(CPU)

print(DISTRIBUTION_STRATEGY)

<tensorflow.python.distribute.tpu_strategy.TPUStrategyV2 object at 0x7da00950b250>


## Mode

In [None]:
# TOGGLE ######################################################################

IMPORT = False
DOWNLOAD = False
TRAINING = True
BINARY = True

## Defining The Metadata

In [None]:
# PIPELINE ####################################################################

BATCH_CONFIG = {
    'batch_size': 128,
    'drop_remainder': True,
    'num_parallel_calls': tf.data.AUTOTUNE,}

PIPELINE_CONFIG = {
    'batch_dim': BATCH_CONFIG['batch_size'],
    'sample_dim': 128 * 16,
    'input_dim': 16,
    'data_weight': 1.0,
    'padding_weight': 0.0001,
    'separator': '\x1d',}

In [None]:
from subprocess import PIPE
# MODEL PARAMETERS ############################################################

LLAMINATE_CONFIG = {
  'num_layers': 4,
  'num_heads': 8,
  'input_dim': PIPELINE_CONFIG['input_dim'],
  'embed_dim': 2048,
  'head_dim': 2048 // 8,
  'hidden_dim': 2048 * 8,
  'epsilon': 1e-6,}

In [None]:
# DERIVED PARAMETERS ##########################################################

LLAMINATE_META = {
    'version': '{}x{}x{}'.format(LLAMINATE_CONFIG['num_layers'], LLAMINATE_CONFIG['embed_dim'], LLAMINATE_CONFIG['output_dim']),
    'path': 'llaminate.keras',
    'url': '',}

In [None]:
# TRAINING PARAMETERS #########################################################

OPTIMIZER_CONFIG = {
    'learning_rate': 0.0001 * (0.1 if IMPORT else 1.0),
    'weight_decay': 0.6,
    'beta_1': 0.9,
    'beta_2': 0.99,
    'clipnorm': 1.0,}

METRICS_CONFIG = {
    # 'factor': PIPELINE_CONFIG['input_dim'],
    'depth': int(math.log(PIPELINE_CONFIG['input_dim'], 2)),}

LOSS_CONFIG = {
    'from_logits': False,
    'label_smoothing': 0.,
    'axis': -1,
    'reduction': 'sum_over_batch_size',
    'name': 'loss',}

CHECKPOINT_CONFIG = {
    'filepath': LLAMINATE_META['path'],
    'monitor': 'val_loss',
    'mode': 'auto',
    'save_freq': 'epoch',
    'save_best_only': False,
    'save_weights_only': False,
    'verbose': 1,}

TENSORBOARD_CONFIG = {
    'log_dir': os.path.join('.logs/', *LLAMINATE_META['version'], datetime.datetime.now().strftime("%Y%m%d-%H%M%S")),
    'histogram_freq': 1,
    'embeddings_freq': 0,
    'profile_batch': (128, 256),
    'write_graph': False,
    'write_images': True,}

TRAINING_CONFIG = {
    'epochs': 4,
    'batch_size': None,
    'validation_split': None,
    'validation_freq': list(range(1, 9)),
    'class_weight': {__c: 0.3 if __c == 0 else 1. for __c in range(256)}, # there are 3 times more 0s than other bytes
    'verbose': 1,}

In [None]:
# DATASETS ####################################################################

# TODO bigcode/the-stack
# TODO ArmelR/stack-exchange-instruction

DATASETS_CONFIG = {
    'pt-wikipedia': {
        'path': 'wikimedia/wikipedia',
        'name': '20231101.en',
        'splits': [f'train[{__p}%:{__p + 9}%]' for __p in range(0, 80, 8)],
        'features': ['text'],},
    'tp-wikipedia-1': {
        'path': 'wikimedia/wikipedia',
        'name': '20231101.en',
        'splits': [f'train[{__p}%:{__p + 1}%]' for __p in range(80, 90, 1)],
        'features': ['text'],},
    'tp-wikipedia-2': {
        'path': 'wikimedia/wikipedia',
        'name': '20231101.en',
        'splits': [f'train[{__p}%:{__p + 1}%]' for __p in range(90, 100, 1)],
        'features': ['text'],},
    'ft-retro-ascii-art': {
        'path': 'jdpressman/retro-ascii-art-v1',
        'name': None,
        'train': 'train',
        'splits': [f'train[{__p}%:{__p + 10}%]+validation[{__p}%:{__p + 10}%]' for __p in range(0, 100, 10)],
        'features': ['prompt', 'art_aic'],},
    'ft-stack-exchange': {
        'path': 'Alignment-Lab-AI/Stack-Exchange-April',
        'name': None,
        'splits': [f'train[{__p}%:{__p + 10}%]' for __p in range(0, 100, 10)],
        'features': ['question', 'answer'],},
    'ft-math': {
        'path': 'hendrycks/competition_math',
        'name': None,
        'splits': [f'train[{__p}%:{__p + 10}%]+test[{__p}%:{__p + 10}%]' for __p in range(0, 100, 10)],
        'features': ['problem', 'solution'],},}

## Loading The Weights

In [None]:
# DERIVED PARAMETERS ##########################################################

if IMPORT and DOWNLOAD:
    urllib.request.urlretrieve(LLAMINATE_META['url'], LLAMINATE_META['path'])

## Loading The Data

In [None]:
# DOWNLOAD ####################################################################

DATASETS = {
    __name: [
        hd.load_dataset(path=__args['path'], name=__args['name'], split=__s).to_tf_dataset(shuffle=True, batch_size=None)
        for __s in __args['splits']]
    for __name, __args in DATASETS_CONFIG.items()}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/131k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/41 [00:00<?, ?files/s]

Generating train split:   0%|          | 0/6407814 [00:00<?, ? examples/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Downloading readme:   0%|          | 0.00/15.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/58.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.97M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5894 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/300 [00:00<?, ? examples/s]

Downloading data:   0%|          | 0.00/3.42G [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Downloading builder script:   0%|          | 0.00/2.57k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/5.32k [00:00<?, ?B/s]

The repository for hendrycks/competition_math contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/hendrycks/competition_math.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/7.91M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5000 [00:00<?, ? examples/s]

## Checking The Data

In [None]:
# STATS #######################################################################

STATS = {__n: mlable.data.stats(dataset=DATASETS[__n][0], features=DATASETS_CONFIG[__n]['features'], count=2048) for __n in DATASETS}

print(STATS)

{'pt-wikipedia': {'min': 0, 'avg': 3688, 'max': 83081}, 'tp-wikipedia-1': {'min': 0, 'avg': 2011, 'max': 106740}, 'tp-wikipedia-2': {'min': 0, 'avg': 8799, 'max': 86014}, 'ft-retro-ascii-art': {'min': 0, 'avg': 3146, 'max': 3348}, 'ft-stack-exchange': {'min': 0, 'avg': 1162, 'max': 14289}, 'ft-math': {'min': 0, 'avg': 511, 'max': 4805}}


In [None]:
__b = iter(DATASETS['ft-stack-exchange'][0])
__s = next(__b)
tf.strings.join(inputs=[__s['question'], __s['answer']], separator='\x1d')

## Preprocess

In [None]:
# ITERATE #####################################################################

for __name in DATASETS:
    # specialized preprocessing fn
    __preprocess = llaminate.pipeline.preprocess_factory(
        features=DATASETS_CONFIG[__name]['features'],
        **PIPELINE_CONFIG)
    # apply
    for __idx in range(len(DATASETS[__name])):
        DATASETS[__name][__idx] = DATASETS[__name][__idx].batch(**BATCH_CONFIG).map(__preprocess, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
# CONCATENATE #################################################################

WIKI_TRAIN = functools.reduce(lambda __l, __r: __l.concatenate(__r), DATASETS['pt-wikipedia'])
WIKI_TEST = functools.reduce(lambda __l, __r: __l.concatenate(__r), DATASETS['tp-wikipedia-1'])

DATASET_TRAIN = functools.reduce(lambda __l, __r: __l.concatenate(__r), [DATASETS[__n][__i] for __n in (set(DATASETS.keys()) - {'ft-retro-ascii-art'}) for __i in range(len(DATASETS[__n]) - 1)]) # - {'pt-wikipedia'}
DATASET_TEST = functools.reduce(lambda __l, __r: __l.concatenate(__r), [DATASETS[__n][-1] for __n in (set(DATASETS.keys()) - {'ft-retro-ascii-art'})]) # - {'pt-wikipedia'}

In [None]:
# CHECK DATASET ###############################################################

print(WIKI_TRAIN.element_spec)
print(WIKI_TEST.element_spec)

print(DATASET_TRAIN.element_spec)
print(DATASET_TEST.element_spec)

print('wiki: {:,} / {:,} samples'.format(WIKI_TRAIN.cardinality().numpy(), WIKI_TEST.cardinality().numpy()))
print('total: {:,} / {:,} samples'.format(DATASET_TRAIN.cardinality().numpy(), DATASET_TEST.cardinality().numpy()))

(TensorSpec(shape=(128, 128, 16), dtype=tf.int32, name=None), TensorSpec(shape=(128, 128, 128), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128), dtype=tf.float32, name=None))
(TensorSpec(shape=(128, 128, 16), dtype=tf.int32, name=None), TensorSpec(shape=(128, 128, 128), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128), dtype=tf.float32, name=None))
(TensorSpec(shape=(128, 128, 16), dtype=tf.int32, name=None), TensorSpec(shape=(128, 128, 128), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128), dtype=tf.float32, name=None))
(TensorSpec(shape=(128, 128, 16), dtype=tf.int32, name=None), TensorSpec(shape=(128, 128, 128), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128), dtype=tf.float32, name=None))
wiki: 45,050 / 5,000 samples
total: 78,192 / 8,688 samples


## Initializing The Model

In [None]:
# METRICS #####################################################################

_Accuracy = mlable.metrics.BinaryGroupAccuracy if BINARY else mlable.metrics.RawGroupAccuracy
_Loss = tf.keras.losses.BinaryCrossentropy if BINARY else tf.keras.losses.MeanSquaredError

In [None]:
# OVERALL SCOPE ###############################################################

with DISTRIBUTION_STRATEGY.scope():
    # METRICS #################################################################
    byte_accuracy = _Accuracy(group=1, name='byte_accuracy', **METRICS_CONFIG)
    character_accuracy = _Accuracy(group=4, name='character_accuracy', **METRICS_CONFIG)
    token_accuracy = _Accuracy(group=PIPELINE_CONFIG['input_dim'], name='token_accuracy', **METRICS_CONFIG)

    # WEIGHTS #################################################################
    LLAMINATE = llaminate.model.Transformer(**LLAMINATE_CONFIG)
    if IMPORT and os.path.isfile(LLAMINATE_META['path']): LLAMINATE = tf.keras.models.load_model(LLAMINATE_META['path'], compile=False)

    # COMPILE #################################################################
    LLAMINATE.compile(
        optimizer=tf.keras.optimizers.AdamW(**OPTIMIZER_CONFIG),
        loss=_Loss(**LOSS_CONFIG),
        weighted_metrics=[byte_accuracy, character_accuracy, token_accuracy])

## Train

In [None]:
# TRAIN #######################################################################

if TRAINING:
    with DISTRIBUTION_STRATEGY.scope():
        # callbacks
        cp_callback = tf.keras.callbacks.ModelCheckpoint(**CHECKPOINT_CONFIG)
        tb_callback = tf.keras.callbacks.TensorBoard(**TENSORBOARD_CONFIG)
        # model fitting
        TRAINING_HISTORY = LLAMINATE.fit(
            x=DATASETS['pt-wikipedia'][1].prefetch(tf.data.AUTOTUNE),
            validation_data=DATASETS['tp-wikipedia-1'][0].prefetch(tf.data.AUTOTUNE),
            callbacks=[cp_callback, tb_callback],
            **TRAINING_CONFIG)

Epoch 1/4
Epoch 1: saving model to llaminate.keras
Epoch 2/4
Epoch 2: saving model to llaminate.keras
Epoch 3/4
Epoch 3: saving model to llaminate.keras
Epoch 4/4
Epoch 4: saving model to llaminate.keras


In [None]:
LLAMINATE.summary()

Model: "transformer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embed (TokunEmbedding)      multiple                  32768     
                                                                 
 block-0 (DecoderBlock)      multiple                  117440512 
                                                                 
 block-1 (DecoderBlock)      multiple                  117440512 
                                                                 
 block-2 (DecoderBlock)      multiple                  117440512 
                                                                 
 block-3 (DecoderBlock)      multiple                  117440512 
                                                                 
 head (Dense)                multiple                  262144    
                                                                 
Total params: 470056960 (1.75 GB)
Trainable params: 470

## Dataviz

In [None]:
__i = iter(DATASETS['tp-wikipedia-1'][0])

In [None]:
__x, __t, __w = next(__i)
__y = LLAMINATE(__x)

In [None]:
__s = llaminate.pipeline.postprocess(__y)

In [None]:
__s

<tf.Tensor: shape=(128,), dtype=string, numpy=
array([b'\x00\x00\x00\x00laa``a  a vmllage in Vie Eematiaistrict of Aavgcjuddrovince,in perkey. It `qopulation is 1(0 (i000).\x02\npaferences\n\nVillages in Mawar @owvrict\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x

In [None]:
# DATA ########################################################################

SAMPLES = [
    """위키백과, 우리 모두의 백과사전.\nt-분포 확률적 임베딩(t-SNE)은 데이터의 차원 축소에 사용되는 기계 학습 알고리즘 중 하나로, 2002년 샘 로이스Sam Rowise와 제프리 힌튼에 의해 개발되었다.[1] t-SNE는 비선형 차원 축소 기법으로, 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용된다. 구체적으로 t-SNE는 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑한다.""",
    """class Encoder(tf.keras.models.Model):\n    def __init__(self, depth: int, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, attention: bool=False, **kwargs) -> None:\n        super(Encoder, self).__init__(**kwargs)\n        self._encoder = tf.keras.Sequential([\n            tf.keras.Input(shape=(encoding_dim,), batch_size=batch_dim, name='input'), # (B * G ^ D, U)\n            tf.keras.layers.Dense(units=embedding_dim, activation=None, use_bias=False, kernel_initializer='glorot_uniform', bias_initializer=None, name='embed-1'),] # (B * G ^ D, U) => (B * G ^ D, E)\n            + [tokun.layers.TokenizeBlock(left_axis=-2, right_axis=-1, token_dim=token_dim, latent_dim=latent_dim, attention=attention, name='tokenize' + (__i + 1) * '-4') for __i in range(depth)]) # (B * G ^ i, E) => (B * G ^ (i-1), E)\n\n    def call(self, x: tf.Tensor) -> tf.Tensor:\n        return self._encoder(x)\n""",
    """class AutoEncoder(tf.keras.models.Model):\n    def __init__(self, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, **kwargs) -> None:\n        super(AutoEncoder, self).__init__(**kwargs)\n        self._encoder = Encoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n        self._decoder = Decoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n\n    def call(self, x: tf.Tensor) -> tf.Tensor:\n        return self._decoder(self._encoder(x))""",
    """class AutoEncoder(tf.keras.models.Model):\n  def __init__(self, token_dim: int, encoding_dim: int, embedding_dim: int, latent_dim: int, batch_dim: int=None, **kwargs) -> None:\n    super(AutoEncoder, self).__init__(**kwargs)\n    self._encoder = Encoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n    self._decoder = Decoder(token_dim=token_dim, encoding_dim=encoding_dim, embedding_dim=embedding_dim, latent_dim=latent_dim, batch_dim=batch_dim)\n\n  def call(self, x: tf.Tensor) -> tf.Tensor:\n    return self._decoder(self._encoder(x))"""]

In [None]:
# CACHE #######################################################################

__cache = llaminate.utils.create_cache(batch_dim=N_BATCH_DIM, cache_dim=N_CACHE_DIM, head_dim=N_HEAD_DIM, num_layers=N_LAYERS_NUM, num_heads=N_HEADS_NUM)
__step = 4

In [None]:
# PREPROCESS ##################################################################

__prompt = """Skynet is an artificial neural network-based conscious group mind and artificial general superintelligence system that serves as the antagonistic force of the Terminator franchise."""
__inputs = tokun.pipeline.preprocess(text=__prompt, token_size=4 * N_SAMPLE_DIM, expand=[1])

In [None]:
# PREDICT #####################################################################

__predictions = LLAMINATE(inputs=__inputs, training=False, mask=None)
__output = tokun.pipeline.postprocess(__predictions, binary=True, random=False)

In [None]:
__batch = iter(DATASETS['pt-wikipedia'][1])

In [None]:
__x, __y, __m = next(__batch)
__p = LLAMINATE(inputs=__x, training=True, mask=None)

In [None]:
__yt = tokun.pipeline.interpret(__y, binary=True)
__yp = tokun.pipeline.interpret(__p, binary=True)
__it = tokun.pipeline.decode(__x)
__ot = tokun.pipeline.decode(__yt)
__op = tokun.pipeline.decode(__yp)

In [None]:
print(__it[:2])
print(__ot[:2])
print(__op[:2])

## Logs

In [None]:
%tensorboard --logdir .logs