In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Pipeline, PreTrainedTokenizer
from typing import Any
import torch


class ModelPipelineLoader:
    def __init__(self, model_id: str) -> None:
        """"""
        self._tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
            model_id)
        self._model: Any = AutoModelForCausalLM.from_pretrained(
            model_id, load_in_8bit=True)
        self._pipeline: Pipeline | None = None

        self._parameters: dict = {
            'torch_dtype': 'torch.bfloat16',
            'device_map': 'auto',
            'max_new_tokens': 8192,
        }

    def __repr__(self) -> str:
        return f'{self.model}, {self.tokenizer}'

    @property
    def model(self):
        return self._model

    @property
    def tokenizer(self):
        return self._tokenizer

    @property
    def pipeline(self):
        return self._pipeline

    @property
    def parameters(self):
        return self._parameters

    def get_pipeline(self) -> Pipeline:
        """"""
        if self.pipeline is None:
            self._pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                torch_dtype=self.parameters['torch_dtype'],
                device_map=self.parameters['device_map'],
                max_new_tokens=self.parameters['max_new_tokens'],
                eos_token_id=self._tokenizer.eos_token_id,
            )

        return self.pipeline

    def reset(self):
        try:
            del self._tokenizer
            del self._model
            torch.cuda.empty_cache()
            del self
        except:
            print("Already destroyed.")

    def __del__(self):
        try:
            del self._tokenizer
            del self._model
            torch.cuda.empty_cache()
            del self
        except:
            print("Already destroyed.")

In [6]:
model_id = '/mnt/sda/models--meta-llama--Llama-2-7b-chat-hf/snapshots/c1b0db933684edbfe29a06fa47eb19cc48025e93/'

pipe = ModelPipelineLoader(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<<SYS>>\n\n"
system_prompt = """
You are an expert in Python programming. Use the provided context and your knowledge to assist users with Python-related queries.
Read the context carefully before providing answers and guide users through Python coding steps.
If you cannot answer a question solely based on the provided context, inform the user accordingly.
Your responses should contain Python code and explanations tailored to the given context.
"""


def get_prompt(instruction, new_system_prompt=system_prompt):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

In [15]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

llm = HuggingFacePipeline(pipeline=pipe.get_pipeline(),
                          model_kwargs={'temperature': 0})
instruction = "{text}"

template = get_prompt(instruction, system_prompt)
prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

In [17]:
user_prompt = "Write a code to for insertion sort algorithm."
llm_response = llm_chain({"text": user_prompt})
llm_response



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST]<<SYS>>

You are an expert in Python programming. Use the provided context and your knowledge to assist users with Python-related queries.
Read the context carefully before providing answers and guide users through Python coding steps.
If you cannot answer a question solely based on the provided context, inform the user accordingly.
Your responses should contain Python code and explanations tailored to the given context.

<<SYS>>

Write a code to for insertion sort algorithm.[/INST][0m

[1m> Finished chain.[0m


{'text': "  Sure, I'd be happy to help you with the insertion sort algorithm in Python!\n\nInsertion sort is a simple sorting algorithm that works by iterating through the list of items to be sorted, and inserting each item into its proper position in the sorted list. Here is an example of how you could implement insertion sort in Python:\n```\ndef insertion_sort(arr):\n    n = len(arr)\n    for i in range(1, n):\n        smallest_index = i\n        for j in range(i):\n            if arr[j] > arr[smallest_index]:\n                smallest_index = j\n        arr[smallest_index] = arr[i]\n    return arr\n```\nThis function takes an unsorted list of items as input, and returns the sorted list.\n\nTo use this function, you can call it with a list of items to be sorted, like this:\n```\narr = [5, 2, 8, 3, 1, 6]\nsorted_arr = insertion_sort(arr)\nprint(sorted_arr)  # [1, 2, 3, 5, 6, 8]\n```\nI hope this helps! Let me know if you have any questions."}

In [24]:
def extract_code_from_string(text):
    # Find the index of the first occurrence of triple backticks
    start_index = text.find("```")
    # Find the second occurrence
    end_index = text.find("```", start_index + 3) if start_index != -1 else -1

    # Extract the code snippet between the triple backticks
    if start_index != -1 and end_index != -1:
        code_snippet = text[start_index + 3:end_index].strip()
        return code_snippet
    else:
        return "Code snippet not found"

In [27]:
response = extract_code_from_string(llm_response['text'])
print(response)

def insertion_sort(arr):
    n = len(arr)
    for i in range(1, n):
        smallest_index = i
        for j in range(i):
            if arr[j] > arr[smallest_index]:
                smallest_index = j
        arr[smallest_index] = arr[i]
    return arr
