In [1]:
from callformer.decoding import DecodingOptions
from callformer.transformer import ModelDimensions, CallFormer
from callformer.tokenizer import Tokenizer

In [9]:
import pickle
from copy import copy
from datetime import date

with open("full_samples.pkl", "rb") as f:
    full_samples = pickle.load(f)

tokenizer = Tokenizer()

token_samples = []

tokens = [{"call": "<|searchnotes|>",
           "args": []},
           {"call": "<|summarize|>"}]

for sample in full_samples:
    search_start_date = ""
    if sample[2][0] != -1:
        search_start_date = f'"{date(year=sample[2][0], month=sample[2][1], day=sample[2][2]).strftime("%Y-%m-%d")}"'
    call_string = (
        f'{tokens[0]["call"]}'
        f'({search_start_date})'
        f'{tokens[1]["call"]}'
        )
    toks = tokenizer.encode(call_string)
    token_samples.append((*sample, toks))

[(sample[0], sample[1], tokenizer.decode(sample[-1])) for sample in token_samples[:2]]

[('2021-01-03',
  'Today is Sunday, January 03, 2021. Give me a summary of my notes from the past two days. Focus on the ones that are related to quantum computing.',
  ['<|searchnotes|>("2021-01-01")<|summarize|>']),
 ('2033-03-27',
  'Today is Sunday, March 27, 2033. Summarize my thoughts on AI safety from the past three days. Organize the summary as a timeline.',
  ['<|searchnotes|>("2033-03-24")<|summarize|>'])]

In [20]:
STATE_SIZE = len(token_samples[0][-2])

model_dims = ModelDimensions(
                n_vocab=tokenizer.vocab_size,
                n_ctx=10,
                n_state=STATE_SIZE,
                n_head=8,
                n_layer=2)

model = CallFormer(model_dims)

CallFormer(
  (decoder): Decoder(
    (token_embedding): Embedding(25, 1536)
    (blocks): ModuleList(
      (0): ResidualAttentionBlock(
        (attn): MultiHeadAttention(
          (query): Linear(in_features=1536, out_features=1536, bias=True)
          (key): Linear(in_features=1536, out_features=1536, bias=False)
          (value): Linear(in_features=1536, out_features=1536, bias=True)
          (out): Linear(in_features=1536, out_features=1536, bias=True)
        )
        (attn_ln): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (cross_attn): MultiHeadAttention(
          (query): Linear(in_features=1536, out_features=1536, bias=True)
          (key): Linear(in_features=1536, out_features=1536, bias=False)
          (value): Linear(in_features=1536, out_features=1536, bias=True)
          (out): Linear(in_features=1536, out_features=1536, bias=True)
        )
        (cross_attn_ln): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (mlp): Sequentia

In [22]:
from transformers import WhisperForConditionalGeneration
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.96k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/151M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/3.49k [00:00<?, ?B/s]

In [24]:
model

WhisperForConditionalGeneration(
  (model): WhisperModel(
    (encoder): WhisperEncoder(
      (conv1): Conv1d(80, 384, kernel_size=(3,), stride=(1,), padding=(1,))
      (conv2): Conv1d(384, 384, kernel_size=(3,), stride=(2,), padding=(1,))
      (embed_positions): Embedding(1500, 384)
      (layers): ModuleList(
        (0): WhisperEncoderLayer(
          (self_attn): WhisperAttention(
            (k_proj): Linear(in_features=384, out_features=384, bias=False)
            (v_proj): Linear(in_features=384, out_features=384, bias=True)
            (q_proj): Linear(in_features=384, out_features=384, bias=True)
            (out_proj): Linear(in_features=384, out_features=384, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (final_lay

In [23]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 144.045MB


In [11]:
tokenizer.vocab_size

25