In [1]:
import warnings
from typing import Any, Dict, Tuple
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="A new version of the following files was downloaded from")

In [2]:
# Constants for prompt generation
INSTRUCTION_KEY = "### Instruction:"
RESPONSE_KEY = "### Response:"
END_KEY = "### End"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)

In [3]:
class InstructionTextGenerationPipeline:
    def __init__(
        self,
        model_name,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        use_auth_token=None,
    ) -> None:
        # Load model
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )

        # Load the tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )
        if tokenizer.pad_token_id is None:
            warnings.warn(
                "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
            )
            tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"
        self.tokenizer = tokenizer
        # Device to use (GPU or CPU)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.eval()
        self.model.to(device=device, dtype=torch_dtype)

        # Set generation parameters
        self.generate_kwargs = {
            "temperature": 0.1,
            "top_p": 0.92,
            "top_k": 0,
            "max_new_tokens": 1024,
            "use_cache": True,
            "do_sample": True,
            "eos_token_id": self.tokenizer.eos_token_id,
            "pad_token_id": self.tokenizer.pad_token_id,
            "repetition_penalty": 1.1,  # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
        }

    def format_instruction(self, instruction):
        return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)

    def __call__(
        self, instruction: str, **generate_kwargs: Dict[str, Any]
    ) -> Tuple[str, str, float]:
        # Format the prompt with the provided instruction
        s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
        
        # Tokenize the prompt and convert to tensors
        input_ids = self.tokenizer(s, return_tensors="pt").input_ids
        input_ids = input_ids.to(self.model.device)
        
        # Set the generation parameters
        gkw = {**self.generate_kwargs, **generate_kwargs}
        
        with torch.no_grad():
            # Generate the output sequence
            output_ids = self.model.generate(input_ids, **gkw)
        
        # Slice the output_ids tensor to get only new tokens
        new_tokens = output_ids[0, len(input_ids[0]) :]
        
        # Decode the new tokens into text
        output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
        
        return output_text

In [6]:
pip install einops

Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl.metadata (13 kB)
Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m621.0 kB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.7.0
Note: you may need to restart the kernel to use updated packages.


In [7]:
# Initialize the model and tokenizer
generate = InstructionTextGenerationPipeline(
    "mosaicml/mpt-7b-instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
stop_token_ids = generate.tokenizer.convert_tokens_to_ids([""])

norm.py:   0%|          | 0.00/3.12k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- norm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


flash_attn_triton.py:   0%|          | 0.00/28.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- flash_attn_triton.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- norm.py
- flash_attn_triton.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


blocks.py:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- blocks.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.




A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- attention.py
- blocks.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_mpt.py:   0%|          | 0.00/32.4k [00:00<?, ?B/s]

custom_embedding.py:   0%|          | 0.00/292 [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- custom_embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


adapt_tokenizer.py:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- adapt_tokenizer.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


param_init_fns.py:   0%|          | 0.00/11.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- param_init_fns.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


hf_prefixlm_converter.py:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- hf_prefixlm_converter.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


meta_init_context.py:   0%|          | 0.00/3.96k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- meta_init_context.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/mosaicml/mpt-7b-instruct:
- modeling_mpt.py
- custom_embedding.py
- adapt_tokenizer.py
- param_init_fns.py
- hf_prefixlm_converter.py
- meta_init_context.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin.index.json:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.36G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


generation_config.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [12]:
# Define the prompt
article="""
a brown dog with a green collar is sitting on the grass.
a dog is sniffing something near the car.
a dog is licking the face of another dog

"""
prompt = 'Write a three long paragraph story on the three lines of the article' + article

# Generate the text based on the prompt
generated_text = generate(prompt)
print(generated_text)


Here’s one possible version, I hope it works for you!  The first line could be the beginning of a longer narrative about this scene, or it might stand alone as a brief description or summary. 
A brown dog with a green collar sits on the grass in front of a house, looking up at its owner who is standing nearby. The dog seems content and relaxed, and is gently wagging its tail. Near the car parked in the driveway, another dog can be seen sniffing something intently. It appears to be a fellow canine companion, and the two dogs are clearly close friends and enjoy spending time together. The first dog then lifts its head and begins licking the face of its friend, seemingly in celebration or affectionate greeting. This touching moment between the two dogs conveys a sense of companionship and mutual affection, and highlights the deep bond shared by these two animals. 

This scene depicts a typical domestic setting, where a dog is enjoying some leisure time outside with its owner. The brown do