# 🎓 Master Thesis: Prompt Engineering with Gemma-2B
This notebook demonstrates Prompt Engineering using the Mistral-7B-Instruct model on a dataset of beginner-level Python Q&A pairs.

We'll test zero-shot and few-shot prompting strategies and store the results for evaluation.

In [10]:
# ✅ Install Required Libraries
!pip install -q transformers accelerate datasets huggingface_hub

In [None]:
# ✅ Login to HuggingFace (insert your token below)
from huggingface_hub import login
login('access token here')  # Replace with your actual token inside quotes

In [12]:
# ✅ Import Libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch, json
from pathlib import Path

In [13]:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

MODEL_ID = "google/gemma-2b-it"  # keep the instruct variant

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)

# Deterministic decoding config (prevents random guessing)
GEN_KW = dict(
    max_new_tokens=256,
    do_sample=False,  # <- crucial: disables sampling (temperature/top_p ignored)
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [39]:
GEN_KW = dict(
    max_new_tokens=512,           # give enough room to finish answers
    do_sample=False,              # deterministic
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

def build_prompt(user_question: str, context: str = "") -> str:
    rules = (
        "You are a careful assistant. Follow STRICTLY:\n"
        "1) Only answer using the provided context (if any).\n"
        "2) If the answer is not in the context or you are uncertain, reply EXACTLY with: Sorry I do not have that information\n"
        "3) Do not add any explanation, punctuation, or extra words when refusing.\n"
        "4) When you do know the answer, explain it in detail with at least 3 sentences and examples if possible.\n"
        "5) Do not rephrase the refusal.\n"
    )

    if context.strip():
        content = rules + f"\nUse ONLY this context to answer:\n---\n{context}\n---\n\nQuestion: {user_question}"
    else:
        content = rules + f"\nQuestion: {user_question}"

    messages = [{"role": "user", "content": content}]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [42]:
def ask_model(question_text: str, context: str = "") -> str:
    prompt = build_prompt(question_text, context)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, **GEN_KW)

    # Decode without prompt-length slicing
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Try to find where the answer starts by splitting on the question
    if question_text in generated_text:
        answer = generated_text.split(question_text, 1)[-1].strip()
    else:
        answer = generated_text.strip()

    # If it's exactly the refusal phrase, keep it clean
    if answer.lower().startswith("sorry i do not have that information"):
        return "Sorry I do not have that information"

    return answer


In [45]:
question = "1.What’s Actually New in Python 3.13"
answer = ask_model(question)
print(answer)


model
Sorry I do not have that information. I am unable to provide a detailed explanation of the new features in Python 3.13 from the context.
