In [1]:
# 🧹 Clean install specific working versions
!pip install -i https://pypi.org/simple/ bitsandbytes==0.41.1
!pip install -q --upgrade transformers accelerate
!pip install -q gradio


Looking in indexes: https://pypi.org/simple/


In [5]:
!pip install -q transformers accelerate gradio

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

model_id = "microsoft/phi-2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load model in full precision (fp16) — no bitsandbytes
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

def ask_mentor(prompt, max_new_tokens=300):
    system_prompt = f"You are a helpful coding mentor. Answer clearly and with code examples if needed.\n\nQuestion: {prompt}\nAnswer:"

    inputs = tokenizer(system_prompt, return_tensors="pt").to(model.device)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    generated_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        do_sample=True,
        top_p=0.95,
        streamer=streamer
    )

    output = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
    return output if output else "⚠️ Sorry, I didn't get that. Try rephrasing."


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
import gradio as gr

def chat(user_input, history):
    history = history or []
    history.append(("🧑 " + user_input, "⏳ Generating..."))
    try:
        response = ask_mentor(user_input)
        history[-1] = ("🧑 " + user_input, "🤖 " + response)
    except Exception as e:
        history[-1] = ("🧑 " + user_input, f"❌ Error: {str(e)}")
    return history, history

with gr.Blocks(theme=gr.themes.Base(), css=".gradio-container {background-color: #111827; color: white;}") as demo:
    gr.Markdown("<h1 style='color:white;'>💻 Kenneth's Coding Mentor</h1>")

    chatbot = gr.Chatbot(height=400, label="🧠 Assistant", elem_id="chatbot")
    msg = gr.Textbox(label="Type your coding question here...", placeholder="e.g. Explain list comprehension in Python")
    state = gr.State([])

    def user_submit(user_input, history):
        return chat(user_input, history)

    msg.submit(user_submit, [msg, state], [chatbot, state])
    msg.submit(lambda: "", None, msg)

demo.launch(share=True)


  chatbot = gr.Chatbot(height=400, label="🧠 Assistant", elem_id="chatbot")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7ee0e005cbcb6f897e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


