In [1]:
import struct
import json
import torch
import numpy as np
from transformers import AutoModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class SpamModel(torch.nn.Module):
    def __init__(self):
        super(SpamModel, self).__init__()
        self.l1 = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
        self.pre_classifier = torch.nn.Linear(384, 384)
        self.dropout = torch.nn.Dropout(0.3)
        self.relu = torch.nn.ReLU()
        self.classifier = torch.nn.Linear(384, 2)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = self.relu(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [3]:
dir_model = '../../Models'
output_model_file = dir_model + '/pytorch_spam.bin'
fname_out = dir_model + "/ggml-model.bin"

with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
    encoder = json.load(f)

with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
    hparams = json.load(f)

with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f:
    vocab = f.readlines()

ftype_str = ["f32", "f16"]
ftype = 1

model = torch.load(output_model_file, map_location=torch.device('cpu'))

In [4]:
list_vars = model.state_dict()
for name in list_vars.keys():
    print(name, list_vars[name].shape, list_vars[name].dtype)

l1.embeddings.word_embeddings.weight torch.Size([30522, 384]) torch.float32
l1.embeddings.position_embeddings.weight torch.Size([512, 384]) torch.float32
l1.embeddings.token_type_embeddings.weight torch.Size([2, 384]) torch.float32
l1.embeddings.LayerNorm.weight torch.Size([384]) torch.float32
l1.embeddings.LayerNorm.bias torch.Size([384]) torch.float32
l1.encoder.layer.0.attention.self.query.weight torch.Size([384, 384]) torch.float32
l1.encoder.layer.0.attention.self.query.bias torch.Size([384]) torch.float32
l1.encoder.layer.0.attention.self.key.weight torch.Size([384, 384]) torch.float32
l1.encoder.layer.0.attention.self.key.bias torch.Size([384]) torch.float32
l1.encoder.layer.0.attention.self.value.weight torch.Size([384, 384]) torch.float32
l1.encoder.layer.0.attention.self.value.bias torch.Size([384]) torch.float32
l1.encoder.layer.0.attention.output.dense.weight torch.Size([384, 384]) torch.float32
l1.encoder.layer.0.attention.output.dense.bias torch.Size([384]) torch.float32


In [5]:
fout = open(fname_out, "wb")

print(hparams)

fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
fout.write(struct.pack("i", hparams["vocab_size"]))
fout.write(struct.pack("i", hparams["max_position_embeddings"]))
fout.write(struct.pack("i", hparams["hidden_size"]))
fout.write(struct.pack("i", hparams["intermediate_size"]))
fout.write(struct.pack("i", hparams["num_attention_heads"]))
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", ftype))

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()
    if name in ['l1.embeddings.position_ids', 'l1.pooler.dense.weight', 'l1.pooler.dense.bias']:
        print("Skip variable: " + name + " with shape: ", data.shape)
        continue
    print("Processing variable: " + name + " with shape: ", data.shape)

    n_dims = len(data.shape)

    # ftype == 0 -> float32, ftype == 1 -> float16
    if ftype == 1 and name[-7:] == ".weight" and n_dims == 2:
        print("  Converting to float16")
        data = data.astype(np.float16)
        l_type = 1
    else:
        l_type = 0

    # header
    str = name.encode('utf-8')
    fout.write(struct.pack("iii", n_dims, len(str), l_type))
    for i in range(n_dims):
        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
    fout.write(str)

    # data
    data.tofile(fout)

fout.close()

print("Done. Output file: " + fname_out)
print("")

{'_name_or_path': 'sentence-transformers/all-MiniLM-L6-v2', 'architectures': ['BertModel'], 'attention_probs_dropout_prob': 0.1, 'classifier_dropout': None, 'gradient_checkpointing': False, 'hidden_act': 'gelu', 'hidden_dropout_prob': 0.1, 'hidden_size': 384, 'initializer_range': 0.02, 'intermediate_size': 1536, 'layer_norm_eps': 1e-12, 'max_position_embeddings': 512, 'model_type': 'bert', 'num_attention_heads': 12, 'num_hidden_layers': 6, 'pad_token_id': 0, 'position_embedding_type': 'absolute', 'torch_dtype': 'float32', 'transformers_version': '4.31.0', 'type_vocab_size': 2, 'use_cache': True, 'vocab_size': 30522}
Processing variable: l1.embeddings.word_embeddings.weight with shape:  (30522, 384)
  Converting to float16
Processing variable: l1.embeddings.position_embeddings.weight with shape:  (512, 384)
  Converting to float16
Processing variable: l1.embeddings.token_type_embeddings.weight with shape:  (2, 384)
  Converting to float16
Processing variable: l1.embeddings.LayerNorm.wei