In [None]:
# default_exp configs

In [None]:
#hide
%load_ext autoreload
%autoreload 2

# Experiment Configs

In [None]:
#export
from fastcore.all import *
from fastai.basics import *

from reformer_fastai.core import *
from reformer_fastai.transformer import *
from reformer_fastai.reformer import *

import json
from inspect import signature, Parameter

In [None]:
#export
def _dummy(): return

In [None]:
#export
def update_sig(d):
    "Update signature of `f` from dict `d`"
    d = {k:Parameter(k, Parameter.KEYWORD_ONLY, default=v) for k,v in d.items()}
    def _f(f):
        sig = signature(f)
        sigd = dict(sig.parameters)
        sigd.pop('kwargs')
        sigd.update(d)
        f.__signature__ = sig.replace(parameters=sigd.values())
        return f
    return _f

In [None]:
#export
class ConfigBase:
    "Base class for Configs"
    _d:dict = None
    _model = _dummy
    
    def __init__(self, *, verbose=False, warn=True, **kwargs):
        self.validate()
        for k,v in kwargs.items():
            if k in self._d:
                self._d[k]=v
                if verbose: print(f'Setting `{k}` = {v}')
            elif warn: print(f'Parameter `{k}` is not accepted by {self._model.__name__}. Skipped')
    
    def validate(self):
        assert exists(self._d), "_d missing. You might want to provide defaults for config"
        assert self._model is not _dummy, "_model missing. Provide a model class"
    
    def validate_arg(self, k):
        assert k in self._d.keys(), f"{self._model.__name__} does not accept `{k}` argument"
        
    def __getattr__(self, k):
        try:
            res = self._d[k]
        except KeyError:
            raise AttributeError(f"{type(self).__name__} does not have attribute `{k}`")
        return res
    
    def __setattr__(self, k, v):
        self.validate_arg(k)
        self._d[k] = v
    
    def __getitem__(self, k):
        return self._d[k]
    
    def __setitem__(self, k, v):
        self.validate_arg(k)
        self._d[k] = v
        
    def __repr__(self):
        s = f"{self._model.__name__} config \n" + '-'*20
        s += ''.join(f'\n{k:16}{v}' for k,v in self._d.items())
        return s
    
    def dict(self): return self._d
    
    def save(self, fn, add_tstmp=False):
        os.makedirs('exp_configs', exist_ok=True)
        if add_tstmp:
            tstmp = time.strftime("_%d_%m_%Y_%H:%M", time.gmtime())
            fn += tstmp
        with open(f'exp_configs/{fn}.json', 'w') as f:
            json.dump(self.dict(), f)
    
    @classmethod
    def from_file(cls, fn):
        with open(f'exp_configs/{fn}.json') as f:
            d = json.load(f)
        return cls(**d)
    

In [None]:
#export
class SyntheticConfig(ConfigBase):
    """
    Config for Synthetic Experiment.
    See https://arampacha.github.io/reformer_fastai/experiment.synthetic-task.html for details
    """
    _model = LSHLM
    _d = {
        'vocab_sz':128,
        'd_model':256,
        'n_layers':1,
        'n_heads':4,
        'd_ff':256,
        'attn_dropout':0.0,
        'ff_dropout':0.0,
        'emb_dropout':0.0,
        'tie_weights':True,
        'causal':True,
        'pos_enc':'absolute',
        'max_seq_len':1024,
        'axial_shape':None,
        'axial_emb_dims':None,
        'pad_idx':None,
        'prenorm':False,
        'attn_bias':False,
        'bucket_size':64,
        'use_lsh':True,
        'n_hashes':4,
        'seed':123,
    }
    @update_sig(_d)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

In [None]:
synt_config = SyntheticConfig(n_hashes=8, seed=1, foo=1, verbose=True)
synt_config

Setting `n_hashes` = 8
Setting `seed` = 1
Parameter `foo` is not accepted by LSHLM. Skipped


LSHLM config 
--------------------
vocab_sz        128
d_model         256
n_layers        1
n_heads         4
d_ff            256
attn_dropout    0.0
ff_dropout      0.0
emb_dropout     0.0
tie_weights     True
causal          True
pos_enc         absolute
max_seq_len     1024
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        8
seed            1

In [None]:
#hide
synt_config.save('_tmp_synt')
synt_config2 = SyntheticConfig.from_file('_tmp_synt')
assert synt_config.dict()==synt_config2.dict(), 'Loading saved config failed' 

In [None]:
#accesing config params as attributes
synt_config.d_model

256

In [None]:
#setting config params as attributes
synt_config.n_hashes = 4
assert synt_config._d['n_hashes'] == 4
try: synt_config.foo = 1
except Exception as e: print(e)

LSHLM does not accept `foo` argument


In [None]:
#accesing config params as items
synt_config['n_layers']

1

In [None]:
#hide
#setting config params as items
synt_config['n_hashes'] = 1
assert synt_config._d['n_hashes'] == 1
try: synt_config['foo'] = 1
except Exception as e: print(e)

LSHLM does not accept `foo` argument


In [None]:
#hide
#skip
try: synt_config.foo
except AttributeError as e: print(e)

SyntheticConfig does not have attribute `foo`


> Note: Config for envik8 needs to be updated when we diside on sequence length.

In [None]:
#export
class TransformerLMConfigEnwik8(ConfigBase):
    """
    Config for enwik8 Experiment.
    See https://arampacha.github.io/reformer_fastai/experiment.enwik8-baseline.html for details
    """
    _model = TransformerLM
    _d = {
        'vocab_sz':256,
        'd_model':1024,
        'n_layers':3,
        'n_heads':8,
        'd_ff':4096,
        'attn_dropout':0.1,
        'ff_dropout':0.1,
        'emb_dropout':0.1,
        'tie_weights':True,
        'causal':True,
        'pos_enc':'axial',
        'max_seq_len':2048,
        'axial_shape':(64,32),
        'axial_emb_dims':None,
        'pad_idx':None,
        'prenorm':False,
        'attn_bias':False,
        'shared_qk':False,
    }
    @update_sig(_d)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

In [None]:
TransformerLMConfigEnwik8()

TransformerLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     2048
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
shared_qk       False

In [None]:
#skip
#hide
TransformerLM.from_config(TransformerLMConfigEnwik8())

TransformerLM(
  (emb): TransformerEmbedding(
    (emb): Embedding(256, 1024)
    (dropout): Dropout(p=0.1, inplace=False)
    (pos_enc): AxialPositionalEmbedding()
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderBlock(
        (attn): PostNorm(
          (sublayer): Residual(
            (sublayer): Attention(
              (in_proj): AttnInProjV2(
                (to_q): Linear(in_features=1024, out_features=1024, bias=False)
                (to_kv): Linear(in_features=1024, out_features=2048, bias=False)
              )
              (attn): ScaledDotProdAttention(
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (out_proj): Linear(in_features=1024, out_features=1024, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        )
        (ff): PostNorm(
          (sublayer): Residual(
      

In [None]:
#export
class ReversibleLMConfigEnwik8(ConfigBase):
    """
    Config for enwik8 Experiment.
    See https://arampacha.github.io/reformer_fastai/experiment.enwik8-reversible.html for details
    """
    _model = ReversibleLM
    _d = {
        'vocab_sz':256,
        'd_model':1024,
        'n_layers':3,
        'n_heads':8,
        'd_ff':4096,
        'attn_dropout':0.1,
        'ff_dropout':0.1,
        'emb_dropout':0.1,
        'tie_weights':True,
        'causal':True,
        'pos_enc':'axial',
        'max_seq_len':2048,
        'axial_shape':(64,32),
        'axial_emb_dims':None,
        'pad_idx':None,
        'prenorm':False,
        'attn_bias':False,
        'rev_thres':0,
    }
    @update_sig(_d)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

In [None]:
ReversibleLMConfigEnwik8(rev_thres=128)

ReversibleLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     2048
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
rev_thres       128

In [None]:
#skip
#hide
ReversibleLM.from_config(ReversibleLMConfigEnwik8())

ReversibleLM(
  (emb): TransformerEmbedding(
    (emb): Embedding(256, 1024)
    (dropout): Dropout(p=0.1, inplace=False)
    (pos_enc): AxialPositionalEmbedding()
  )
  (encoder): ReversibleEncoder(
    (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (layers): ReversibleSequence(
      (blocks): ModuleList(
        (0): ReversibleBlock(
          (f): Deterministic(
            (net): PostNorm(
              (sublayer): Attention(
                (in_proj): AttnInProjV2(
                  (to_q): Linear(in_features=1024, out_features=1024, bias=False)
                  (to_kv): Linear(in_features=1024, out_features=2048, bias=False)
                )
                (attn): ScaledDotProdAttention(
                  (dropout): Dropout(p=0.1, inplace=False)
                )
                (out_proj): Linear(in_features=1024, out_features=1024, bias=False)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (norm): LayerNorm((1024,)

In [None]:
#export
class NHashesConfig(ConfigBase):
    """
    Config for evaluating performance as function of `n_hashes`.
    See https://arampacha.github.io/reformer_fastai/experiment.enwik8-n_hashes.html for details
    """
    _model = LSHLM
    _d = {
        'vocab_sz':256,
        'd_model':1024,
        'n_layers':3,
        'n_heads':8,
        'd_ff':4096,
        'attn_dropout':0.1,
        'ff_dropout':0.1,
        'emb_dropout':0.1,
        'tie_weights':True,
        'causal':True,
        'pos_enc':'axial',
        'max_seq_len':4096,
        'axial_shape':None,
        'axial_emb_dims':None,
        'pad_idx':None,
        'prenorm':False,
        'attn_bias':False,
        'bucket_size':64,
        'use_lsh':True,
        'n_hashes':2,
        'seed':842,
    }
    @update_sig(_d)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

In [None]:
cfg = NHashesConfig()
cfg

LSHLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     4096
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        2
seed            842

In [None]:
#export
class NLayersConfig(ConfigBase):
    """
    Config for evaluating performance as function of `n_layers`.
    See https://arampacha.github.io/reformer_fastai/experiment.enwik8-n_layers.html for details
    """
    _model = ReformerLM
    _d = {
        'vocab_sz':256,
        'd_model':1024,
        'n_layers':3,
        'n_heads':8,
        'd_ff':4096,
        'ff_chunks':64,
        'attn_dropout':0.1,
        'ff_dropout':0.1,
        'emb_dropout':0.1,
        'tie_weights':True,
        'causal':True,
        'pos_enc':'axial',
        'max_seq_len':2**14,
        'axial_shape':None,
        'axial_emb_dims':None,
        'pad_idx':None,
        'prenorm':False,
        'attn_bias':False,
        'bucket_size':64,
        'use_lsh':True,
        'n_hashes':8,
        'rev_thres':0,
        'seed':842,
    }
    @update_sig(_d)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

In [None]:
cfg = NLayersConfig()
cfg

ReformerLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
ff_chunks       64
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     32768
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        8
rev_thres       0
seed            842

In [None]:
#hide
ReformerLM.from_config(cfg)

ReformerLM(
  (emb): TransformerEmbedding(
    (emb): Embedding(256, 1024)
    (dropout): Dropout(p=0.1, inplace=False)
    (pos_enc): AxialPositionalEmbedding()
  )
  (encoder): ReformerEncoder(
    (layers): ReversibleSequence(
      (blocks): ModuleList(
        (0): ReversibleBlock(
          (f): Deterministic(
            (net): PostNorm(
              (sublayer): ReformerAttentionV2(
                (in_proj): SharedQKAttnInProj(
                  (to_qk): Linear(in_features=1024, out_features=1024, bias=False)
                  (to_v): Linear(in_features=1024, out_features=1024, bias=False)
                )
                (lsh_attn): LSHAttention(
                  (dropout): Dropout(p=0.1, inplace=False)
                  (dropout_for_hash): Dropout(p=0.0, inplace=False)
                )
                (full_attn): ScaledDotProdAttention(
                  (dropout): Dropout(p=0.1, inplace=False)
                )
                (out_proj): Linear(in_features=1024, out_fe

In [None]:
#skip
#hide
signature(SyntheticConfig)

<Signature (*, vocab_sz=128, d_model=256, n_layers=1, n_heads=4, d_ff=256, attn_dropout=0.0, ff_dropout=0.0, emb_dropout=0.0, tie_weights=True, causal=True, pos_enc='absolute', max_seq_len=1024, axial_shape=None, axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, bucket_size=64, use_lsh=True, n_hashes=4, seed=123)>

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_layers.ipynb.
Converted 02_attention.ipynb.
Converted 03_transformer.ipynb.
Converted 04_reformer.ipynb.
Converted 05_tokenizers.ipynb.
Converted 06_data.ipynb.
Converted 07_metrics.ipynb.
Converted 08_optimizers.ipynb.
Converted 09_tracking.ipynb.
Converted 10_experiment.synthetic-task.ipynb.
Converted 10a_experiment.synthetic-task-comparison.ipynb.
Converted 10b_experiment.synthetic-task-minimal.ipynb.
Converted 10c_experiment.synthetic-task-analysis.ipynb.
Converted 11a_experiment.enwik8_baseline.ipynb.
Converted 11b_experiment.enwik8_sharedQK.ipynb.
Converted 11c_experiment.enwik8_reversible.ipynb.
Converted 12_experiment.speed-lsh_synthetic-task.ipynb.
Converted 13_experiment.enwik8-n_hashes.ipynb.
Converted 20_experiment-script.ipynb.
Converted 21_experiment-configs.ipynb.
Converted 50_exploration.LSH.ipynb.
Converted index.ipynb.
Converted reproducibility.report_1_reproducibility_summary.ipynb.
