In [33]:
from glob import glob
import sentencepiece as spm
from tokenizers import SentencePieceUnigramTokenizer
from transformers import AlbertTokenizer

text_sample = """We aimed to evaluate the effect of sleep quality on memory, executive function, and language performance in patients with refractory focal epilepsy and controlled epilepsy and compare these
 with healthy individuals. We prospectively enrolled 37 adolescent and adult patients with refractory focal epilepsy.

How to avoid anti-clockwise rotation animation when reseting rotation from 360deg to 0 deg?

I am creating an animation that looks like a fancy wheel, When resetting rotation from 360deg to 0 deg, It animating the wheel in anti-clockwise direction, How to Avoid this???
HTML
<ul class="cm">
  <li><span>01</span></li>
  <li><span>02</span></li>
  <li><span>03</span></li>
  <li><span>04</span></li>
  <li><span>05</span></li>
  <li><span>06</span></li>
  <li><span>07</span></li>
  <li><span>08</span></li>
</ul>
"""

print("Split on whitespace", len(text_sample.split()), "tokens")

for t in (4, 8, 16, 32):
    print("-"*100)
    print(f"Loading {t}k_model")
    tokenizer = SentencePieceUnigramTokenizer.from_spm(f"spmodels/pile_{t}k.model")
    tokenized = tokenizer.encode(text_sample)
    print(len(tokenized), "tokens")
    print("-" *100)
    print(" ".join(tokenized.tokens))

print("-" *100)
print("Albert-tokenizer")
tok = AlbertTokenizer.from_pretrained("albert-large-v2")
altok = tok.encode(text_sample)
print(len(altok), "tokens")
print("-" *100)
print(" ".join(tok.convert_ids_to_tokens(altok)))

Split on whitespace 99 tokens
----------------------------------------------------------------------------------------------------
Loading 4k_model
279 tokens
----------------------------------------------------------------------------------------------------
▁We ▁a im ed ▁to ▁evaluate ▁the ▁effect ▁of ▁sleep ▁quality ▁on ▁memory , ▁ex ec ut ive ▁function , ▁and ▁language ▁performance ▁in ▁patients ▁with ▁re frac t ory ▁fo cal ▁epi le p s y ▁and ▁control led ▁epi le p s y ▁and ▁comp are ▁these ▁with ▁health y ▁individual s . ▁We ▁pro spec t ive ly ▁en roll ed ▁ 37 ▁ ado les cent ▁and ▁adult ▁patients ▁with ▁re frac t ory ▁fo cal ▁epi le p s y . ▁How ▁to ▁avoid ▁anti - c lock wise ▁ro t ation ▁an im ation ▁when ▁re set ing ▁ro t ation ▁from ▁3 60 de g ▁to ▁0 ▁de g ? ▁I ▁am ▁cre ating ▁an ▁an im ation ▁that ▁look s ▁like ▁a ▁f ancy ▁wh e el , ▁When ▁re set ting ▁ro t ation ▁from ▁3 60 de g ▁to ▁0 ▁de g , ▁It ▁an im ating ▁the ▁wh e el ▁in ▁anti - c lock wise ▁direction , ▁How ▁to ▁A vo i

In [3]:
"""Save tokenizers into json"""

from tokenizers import SentencePieceUnigramTokenizer

for t in (4, 8, 16, 32):
    print("-"*100)
    print(f"Loading {t}k_model")
    tokenizer = SentencePieceUnigramTokenizer.from_spm(f"spmodels/pile_{t}k.model")
    tokenizer.save(f"tokenizers/pile_{t}.json", pretty=True)

----------------------------------------------------------------------------------------------------
Loading 4k_model
----------------------------------------------------------------------------------------------------
Loading 8k_model
----------------------------------------------------------------------------------------------------
Loading 16k_model
----------------------------------------------------------------------------------------------------
Loading 32k_model
