***leveraging prompting for text classification***

In [None]:
import torch

# Set device once at the top
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using {device}")

# # When loading models
# model.to(device)

# # When passing data
# data = data.to(device)

attn_implementation='eager'

In [None]:
#loading a text generation model
from huggingface_hub import ModelHubMixin
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model=AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", torch_dtype="auto", trust_remote_code=True)
model.to(device)
tokenizer=AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

pipe= pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False   #setting this false ensures somewhat consistent output, most probable token 

)



In [None]:
messages=[{"role":"user","content":"tell me a funny joke about chickens"}]

response=pipe(messages)
response

In [None]:
print(response[0]["generated_text"])

In [None]:
prompt=pipe.tokenizer.apply_chat_template(messages, tokenize=False)
print(prompt)

In [None]:
output=pipe(messages, do_sample=True, temperature=1, top_p=1)
print(output[0]["generated_text"])

In [None]:
messages=[{"role":"user", "content":"classify the text into negative or positive: the movie is amazing!"}]
response=pipe(messages)
response

look below how we modeled it up to give just one word as output-- they're called output indicators

In [None]:
messages=[{"role":"user", "content":"classify the text into negative or positive: text:the movie is amazing! , Sentiment:"}]
response=pipe(messages)
response

In [None]:
persona = "You are an expert in Large Language models. You excel at breaking down complex papers into digestible summaries.\n"
instruction = "Summarize the key findings of the paper provided.\n"
context = "Your summary should extract the most crucial points that can help researchers quickly understand the most vital information of the paper.\n"
data_format = "Create a bullet-point summary that outlines the method. Follow this up with a concise paragraph that encapsulates the main results.\n"
audience = "The summary is designed for busy researchers that  quickly need to grasp the newest trends in Large Language Models.\n"
tone = "The tone should be professional and clear.\n"
text = "transfer Learning for Identifying Land Use and Land Cover from Satellite Imagery"
data = f"Text to summarize: {text}"
# The full prompt - remove and add pieces to view its impact on the generated output
#like for text-- with open(filename, "r") as f:
#                text= f.read()
query = persona+instruction
response=pipe(query+ instruction + context + data_format + audience+ tone + data)
print(response[0]["generated_text"])

**we can also provide examples of how the output must look like, demonstrated below as to how it should react to a made up word:**

In [None]:
one_shot_prompt= [
    {
    "role":"user","content":"'abhimanyu' is the name of the saviour of the universe. It might sound made up or fictional but its actually close to be proven. An example of sentence that uses word 'abhimanyu' is:",
    "role":"assistant","content":"abhimanyu was, is and will forever be the greatest to do it.",

    "role":"user", "content":"A 'totulolu' is a spell used on bitches to behave them. An example of sentence that uses word 'totulolu' is: "
}
]

In [None]:
response=pipe(one_shot_prompt, temperature=1, top_p=1)
print(response[0]["generated_text"])

chain prompting--dont need to give ex pretty easy

In [None]:
# Answering with chain-of-thought
cot_prompt = [
{"role": "user", "content": "Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?"},
{"role": "assistant", "content": "Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11.The answer is 11."},
{"role": "user", "content": "The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more, how many apples do they have?"}
]

In [None]:
response=pipe(cot_prompt)
print(response[0]["generated_text"])

here, many different forms that work but
a common and effective method is to use the phrase “Let’s think step-by-
step,

In [None]:
zeroshot_tot=[
    {
        "role":"user","content":"imagine 3 different running experts are answering this question. All experts will write down 1 step of their thinking, then share it with the group.\n"
        "then all experts will go on to the next step, etc. If any expert realizes they are wrong, they leave the conversation. The question is: 'Will running long miles on a low protein diet \n"
        "and no weight training, lead to heavy muscle loss in an individual?' Make sure to discuss the results." 
        
    }
]
response= pipe(zeroshot_tot)
print(response[0]["generated_text"])

**output verification**

In [None]:
#providing examples:
#1. without examples
zeroshot_prompt=[
    {
        "role":"user","content":"create a character profile for an RPG game in JSON format"
    }
]
response=pipe(zeroshot_prompt)
print(response[0]["generated_text"])

In [None]:
#1 example
one_shot_template = """Create a short character profile for an
RPG game. Make sure to only use this format:
{
"description": "A SHORT DESCRIPTION",
"name": "THE CHARACTER'S NAME",
"armor": "ONE PIECE OF ARMOR",
"weapon": "ONE OR MORE WEAPONS"
}
"""
one_shot_prompt = [
{"role": "user", "content": one_shot_template}
]
# Generate the output
outputs = pipe(one_shot_prompt)
print(outputs[0]["generated_text"])

although it worked here, still won't ensure proper formatting all the time.. so we move to grammer to constrained sampling

**either we do validation on the output until it gets the output correct OR WE VALIDATE AT THE TIME OF TOKEN SELECTION ITSELF**


In [None]:
#----------------clear all outputs to free ram------------------------

In [None]:
from llama_cpp.llama import Llama  #used here to get json grammer

llm=Llama.from_pretrained(
    repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
    filename="*fp16.gguf",
    n_gpu_layers=-1,
    n_ctx=2048,
    verborse=False
)
llm.to(device)

output= llm.create_chat_completion(
    messages=[{"role": "user", "content": "Create a warrior for an RPG in JSON format"},],
    response_format={"type":"json_object"},
    temperature=0,

)['choices'][0]['message']["content"]

{"timestamp":"2025-07-24T11:46:15.478817Z","level":"WARN","fields":{"message":"Reqwest(reqwest::Error { kind: Request, source: hyper_util::client::legacy::Error(SendRequest, hyper::Error(IncompleteMessage)) }). Retrying..."},"filename":"/Users/runner/work/xet-core/xet-core/cas_client/src/http_client.rs","line_number":242}
{"timestamp":"2025-07-24T11:46:15.478902Z","level":"WARN","fields":{"message":"Retry attempt #0. Sleeping 1.156946832s before the next attempt"},"filename":"/Users/runner/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/reqwest-retry-0.7.0/src/middleware.rs","line_number":171}


In [None]:
import json
# Format as json
json_output = json.dumps(json.loads(output), indent=4)
print(json_output)