In [1]:
import random


class Tokenizer:

    def __init__(self):
        self.vocab = {
            # P: padding, S: start, E: end, U: unknown
            'mark': list('PSEU'),
            'number': list('0123456789'),
            'symbol': list('+-*/'),
            'other': list('.:=_')
        }

        self.decoder = [j for i in self.vocab.values() for j in i]
        self.encoder = {j: i for i, j in enumerate(self.decoder)}
        print(self._get_vocab_size())

    def get_data(self, third_number):
        question = ''
        for i in range(2):
            question += '%.2f' % random.uniform(-100, 100)
            question += random.choice(self.vocab['symbol'])

        question = question[:-1]
        if third_number:
            # 控制只有 加法
            question += '+%.2f' % random.uniform(-100, 100)

        try:
            answer = '%.2f' % eval(question)
        except:
            answer = '0.00'

        #交换问答方向
        question, answer = answer, question

        token = 'S' + question + '=' + answer + 'E'
        token = [self.encoder[i] for i in token]
        return token

    def decode(self, token):
        return ''.join([self.decoder[i] for i in token])

    def _get_vocab_size(self):
        # return sum(len(v) for k, v in self.vocab.items())
        print(f'decoder = {self.decoder}')
        print(f'encoder = {self.encoder}')
        print(f'length check: {len(self.decoder) == len(self.encoder)}')
        return len(self.decoder)


tokenizer = Tokenizer()

tokenizer.decode(tokenizer.get_data(third_number=True))

decoder = ['P', 'S', 'E', 'U', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', '*', '/', '.', ':', '=', '_']
encoder = {'P': 0, 'S': 1, 'E': 2, 'U': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '+': 14, '-': 15, '*': 16, '/': 17, '.': 18, ':': 19, '=': 20, '_': 21}
length check: True
22


'S-98.07=-31.47-26.05+-40.55E'

In [2]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(device)

cuda


In [3]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [4]:
class QwenModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        from transformers import AutoConfig, AutoModelForCausalLM

        # 自定义缓存路径
        cache_path = "/root/autodl-tmp/LLMs/transfromers_qwen/"

        # 加载 Qwen2.5-14B 的配置与模型，并指定 cache_dir
        self.config = AutoConfig.from_pretrained("Qwen/Qwen2.5-14B", trust_remote_code=True, cache_dir=cache_path)
        self.feature = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-14B", config=self.config, trust_remote_code=True, cache_dir=cache_path)
        
        self.fc_out = torch.nn.Linear(self.config.hidden_size, self.config.vocab_size, bias=False)

        self.to(device)
        self.train()

    def forward(self, input_ids, attention_mask):
        out = self.feature(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
        return self.fc_out(out)

qwen_model = QwenModel()
qwen_model

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 270.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 86.25 MiB is free. Process 949121 has 606.00 MiB memory in use. Process 391954 has 22.96 GiB memory in use. Of the allocated memory 22.62 GiB is allocated by PyTorch, and 728.00 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [6]:
class ModelGEN(torch.nn.Module):

    def __init__(self):
        super().__init__()
        from transformers import LlamaConfig, LlamaModel

        # 定义 Llama 模型的结构和超参数配置, 构造一个新的、参数较小的 LLaMa 模型实例
        # 使用 transformers 库中实现的 LLaMa 模型，并不是加载某个预训练的官方版本, 比如 LLaMA-7B、LLaMA-13B 等
        self.config = LlamaConfig(
            hidden_size=64,  # 隐藏层的大小
            intermediate_size=64,  # 中间层大小
            max_position_embeddings=128,  # 最大位置嵌入数
            num_attention_heads=4,  # 注意力头数量
            num_hidden_layers=4,  # Transformer 层数
            num_key_value_heads=4,  # 键值对头的数量
            vocab_size=tokenizer._get_vocab_size()  # 词表大小
        )
        # 使用 LLama
        self.feature = LlamaModel(self.config)  # 提取文本特征
        self.fc_out = torch.nn.Linear(64, self.config.vocab_size, bias=False)  # 线性输出
        # self.fc_out -> (batch_size, seq_len, vocab_size)
        # -> 每个位置上对词表中每个 token 的预测得分 logits

        self.to(device)
        self.train()

    def forward(self, input_ids, attention_mask):
        '''
        input_ids, attention_mask: (batch_size, seq_len)
        '''
        out = self.feature(
            input_ids=input_ids,
            attention_mask=attention_mask
        ).last_hidden_state

        return self.fc_out(out)

model = ModelGEN()
model

decoder = ['P', 'S', 'E', 'U', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', '*', '/', '.', ':', '=', '_']
encoder = {'P': 0, 'S': 1, 'E': 2, 'U': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '+': 14, '-': 15, '*': 16, '/': 17, '.': 18, ':': 19, '=': 20, '_': 21}
length check: True


ModelGEN(
  (feature): LlamaModel(
    (embed_tokens): Embedding(22, 64)
    (layers): ModuleList(
      (0-3): 4 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=64, out_features=64, bias=False)
          (k_proj): Linear(in_features=64, out_features=64, bias=False)
          (v_proj): Linear(in_features=64, out_features=64, bias=False)
          (o_proj): Linear(in_features=64, out_features=64, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=64, out_features=64, bias=False)
          (up_proj): Linear(in_features=64, out_features=64, bias=False)
          (down_proj): Linear(in_features=64, out_features=64, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((64,), eps=1e-06)
        (post_attention_layernorm): LlamaRMSNorm((64,), eps=1e-06)
      )
    )
    (norm): LlamaRMSNorm((64,), eps=1e-06)
    (rotary_emb): LlamaRotaryEmbedding()
  )
  (fc_out): Linear

In [5]:
generater = None


def generate(model_gen, input_ids):
    global generater
    if not generater:
        # 包装类,用于生成
        from transformers import AutoModelForCausalLM
        generater = AutoModelForCausalLM.from_config(model_gen.config)
        generater.model = model_gen.feature
        generater.lm_head = model_gen.fc_out
        generater.to(device)

    return generater.generate(input_ids=input_ids,
                              min_length=-1,
                              top_k=0.0,
                              top_p=1.0,
                              do_sample=True,
                              pad_token_id=tokenizer.encoder['P'],
                              max_new_tokens=35,
                              eos_token_id=tokenizer.encoder['E'])