In [11]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
import math
import torch
import sys

In [15]:
PROMPT_DICT = {
    "prompt_no_input": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Response:"
    ),
    "prompt_no_input_llama2": (
        "<s>[INST] <<SYS>>\n"
        "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\n"
        "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n"
        "<</SYS>> \n\n {instruction} [/INST]"
    ),
    "prompt_llama2": "[INST]{instruction}[/INST]"
}

In [2]:
model_name = "Yukang/LongAlpaca-7B"
cache_dir = "./cache"
context_size = 32768

In [3]:
config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir)

In [4]:
orig_ctx_len = getattr(config, "max_position_embeddings", None)
if orig_ctx_len and context_size > orig_ctx_len:
    scaling_factor = float(math.ceil(context_size / orig_ctx_len))
    config.rope_scaling = {"type": "linear", "factor": scaling_factor}

In [5]:
model = AutoModelForCausalLM.from_pretrained(
        model_name,
        config=config,
        cache_dir=cache_dir,
        torch_dtype=torch.float16,
        device_map="auto",
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16
        )
    )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model.resize_token_embeddings(32001)

Embedding(32001, 4096, padding_idx=0)

In [7]:
tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        cache_dir=cache_dir,
        model_max_length=context_size if context_size > orig_ctx_len else orig_ctx_len,
        padding_side="right",
        use_fast=False,
    )

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

In [8]:
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32001, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaLinearScalingRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNo

In [10]:
if torch.__version__ >= "2" and sys.platform != "win32":
    model = torch.compile(model)

In [12]:
def build_generator(
    model, tokenizer, temperature=0.6, top_p=0.9, max_gen_len=512, use_cache=True
):
    def response(prompt):
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        streamer = TextStreamer(tokenizer)
        
        output = model.generate(
            **inputs,
            max_new_tokens=max_gen_len,
            temperature=temperature,
            top_p=top_p,
            use_cache=use_cache,
            streamer=streamer,
        )
        
        out = tokenizer.decode(output[0], skip_special_tokens=True)

        out = out.split(prompt.lstrip("<s>"))[1].strip()
        return out

    return response

In [13]:
respond = build_generator(model, tokenizer)

In [14]:
material = "Item CB 119875: speaker 3: Agenda Item one Council Bill 119 875 relating to the South Park immigration operation, an amendment to the Interlocal statement between the City of Seattle and the South Park District.\nspeaker 0: Thank you. I moved to pass Council Bill 11987875. Is there a second? Second. It's been moved and seconded to pass the bill and sponsor this bill. I will address it first and then open the floor to comments from my colleagues.\nspeaker 0: We've discussed this amendment for a few months now. On April 28th, 2014, the Seattle City Council approved ordinance 124468, which authorized the Interlocal agreement between the City of Seattle and the Metropolitan Park District for implementation of Park and Recreation Services and infrastructure in Seattle.\nspeaker 0: pre-COVID, the original schedule would have been to use this year 2020 to plan the next six years cycle of Metropolitan Park District spending. The spending plan, which is scheduled for 2020 21 to 2026. However, COVID has shifted our timelines and we must adjust.\nspeaker 0: Council Bill 119875. We must adjust. I'm sorry. Council Bill 119875 is an amendment to the Interlocal agreement which authorizes the delay in the event of an emergency by COVID 19. The MPD board will consider Resolution 36 and 37 later this afternoon, which concurrently will complete the Interlocal agreement amended.\nspeaker 0: But on the what we call the Metropolitan Park District side, I recommend Council adopt this bill. Are there any comments on the bill? Will the clerk please call the rule on the passage of the bill?\nspeaker 2: So aren't. Yes.\nspeaker 1: Strauss that.\nspeaker 2: HERBOLD Yes.\nspeaker 1: Lewis Yes.\nspeaker 2: Morales. Rosetta. Councilmember Mosquera. Peterson.\nspeaker 1: Yes.\nspeaker 2: Councilmember Suarez.\nspeaker 0: Yes.\nspeaker 2: 17 favorite animals.\nspeaker 0: Okay, so we'll just go forward without mosquito.\nspeaker 2: Correct.\nspeaker 0: Okay. The bill passes and the chair will sign it. Will the clerk please fix my signature to the legislation on my behalf?\nspeaker 2: Madam President, I'm sorry. I couldn't not get off me. Yes, I.\nspeaker 0: Am glad to see that. I'm not the only one that does that. Okay. I'm sorry. Thank you for.\nspeaker 2: Receiving a favor. And unopposed.\nspeaker 0: Great. Thank you. So we are going to get into the appointments, 21 of them, the ones that we discussed this morning, that council member Herbert through this morning. Thank you very much. Well, the clerk please read items 222 into the record.\n\nItem Res 31974: speaker 3: Agenda Item 23 Resolution 31974. Setting the time and place for a hearing on the appeal of Robert Wexler. Hearing Center Case Number CW f0149.\nspeaker 0: Thank you. I'm moved to adopt resolution 31974. There a second.\nspeaker 2: I can.\nspeaker 0: Thank you. It's been moved and seconded to adopt the resolution. Council member. That's me. As the sponsor, I will address the item. This bill sets the time and place for a hearing on what appeal to the hearing examiners findings and recommendation report of the waterfront lead or local improvement district number 6751.\nspeaker 0: This resolution sets the hearing of the appeal to January 5th, 2021, and the regularly scheduled Public Assets and Native Communities Committee. To meet the requirements of the quasi judicial rules. Council must take action this week via this resolution.\nspeaker 0: And this is a mandatory procedural matter, and we have voted on this in the past. I recommend council confirm Resolution 31974. Second. Okay. Okay. So will the clerk please call the role on the adoption of the resolution?\nspeaker 2: Salon? Yes. Strauss Yeah. Herbold? Yes. Lewis.\nspeaker 1: Yes.\nspeaker 2: Where else? You know, that's. Sarah, I. Peterson.\nspeaker 1: Yes.\nspeaker 2: Councilmember Suarez. Yes. Vote in favor and unopposed.\nspeaker 0: Thank you. The resolution is adopted and the chair will sign it. Will the court please affix my signature to the legislation on my behalf? That's the game you hear over there making comments. Slide show that. Oh, okay. So other business.\nspeaker 0: Are there any other further business to come before the council? Okay. Well, I have some there's no objection. I will be excused from the October 26 city council meeting. Hearing no objection. I am excused for the October 26 council meeting.\nspeaker 0: Okay, colleagues, this concludes the items on a business on today's agenda. Our next regularly scheduled city council meeting is on Monday, October 26 at two. But before you go, before we adjourn, we will return at 530 for our Metropolitan Park District Board meeting in which you are all board members for public comment, a public hearing.\nspeaker 0: We're going to review two resolutions and have a presentation by superintendent basis again today. And with that, we are now joined. Thank you very much.\n\n"

In [16]:
prompt_no_input = PROMPT_DICT["prompt_llama2"]

In [17]:
question = "Summarize the meeting transcript."

In [18]:
prompt = prompt_no_input.format_map({"instruction": material + "\n%s"%question})

In [19]:
output = respond(prompt=prompt)

<s> [INST]Item CB 119875: speaker 3: Agenda Item one Council Bill 119 875 relating to the South Park immigration operation, an amendment to the Interlocal statement between the City of Seattle and the South Park District.
speaker 0: Thank you. I moved to pass Council Bill 11987875. Is there a second? Second. It's been moved and seconded to pass the bill and sponsor this bill. I will address it first and then open the floor to comments from my colleagues.
speaker 0: We've discussed this amendment for a few months now. On April 28th, 2014, the Seattle City Council approved ordinance 124468, which authorized the Interlocal agreement between the City of Seattle and the Metropolitan Park District for implementation of Park and Recreation Services and infrastructure in Seattle.
speaker 0: pre-COVID, the original schedule would have been to use this year 2020 to plan the next six years cycle of Metropolitan Park District spending. The spending plan, which is scheduled for 2020 21 to 2026. How