In [None]:
from pathlib import Path
from typing import Union, List, Dict

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "eurecom-ds/Phi-3-mini-4k-socratic",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="cpu",  # "cuda",
)

tokenizer = AutoTokenizer.from_pretrained(
    "eurecom-ds/Phi-3-mini-4k-socratic", trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a default pad token.

In [None]:
def load_template(template_path: Union[str, Path] = "templates/inference.txt") -> str:
    """Load the chat template from inference.txt."""
    with open(template_path, "r", encoding="utf-8") as f:
        return f.read().strip()


def format_prompt(prompt: str, template: str) -> str:
    """Format one or more prompts using a pre-loaded loaded chat template."""
    return template.replace("{input}", prompt).strip()


def chat(user_input: Union[str, List], template: str) -> Union[str, List]:
    """Generates a response from the model using the template."""
    if isinstance(user_input, str):
        user_input = [user_input]
    prompts = [format_prompt(prompt, template) for prompt in user_input]
    model_inputs = tokenizer(prompts, return_tensors="pt", padding=True)

    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs,
            do_sample=True,
            temperature=0.8,
            top_p=0.95,
            length_penalty=1.0,
            repetition_penalty=1.0,
            max_new_tokens=100,
            pad_token_id=tokenizer.eos_token_id,
        )

    full_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    generated_texts = [
        full_output[len(prompt) :].strip()  # Only extract the LLM's answer.
        for prompt, full_output in zip(prompts, full_outputs)
    ]
    if len(generated_texts) == 1:
        return generated_texts[0]
    return generated_texts

In [None]:
template = load_template()
prompts = [
    """Student: What is the meaning of life?
Teacher: That's a deep question! What do you think makes life meaningful?'
Student: I'm not sure. Maybe happiness?
""",
    """Student: What inspired the Monroe Doctrine?
Teacher: That's a complex geopolitical question! What can you tell me about American foreign policy before James Monroe's presidency??'
Student: I think perhaps something to do with active foreign influence?
""",
    "Student: Professor, why did Einstein say that God does not play dice?",
]
response = chat(prompts, template)

In [None]:
response

In [86]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch, urllib
from pathlib import Path
from typing import Union, List, Dict
from transformers import Conversation

In [87]:
model = AutoModelForCausalLM.from_pretrained(
    "eurecom-ds/Phi-3-mini-4k-socratic",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="cpu",
)

tokenizer = AutoTokenizer.from_pretrained(
    "eurecom-ds/Phi-3-mini-4k-socratic", trust_remote_code=True
)


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [115]:
TEMPLATE = load_template()
CONVERSATION = Conversation()

In [116]:
def chat(user_input):
    content = TEMPLATE.format(input=user_input)
    CONVERSATION.add_message({"role": "user", "content": content})
    formatted = tokenizer.apply_chat_template(CONVERSATION.messages, tokenize=False, add_generation_prompt=True)
    encoded_inputs = tokenizer(
        [formatted],
        return_tensors="pt",
    )
    generate_kwargs = dict(encoded_inputs, max_new_tokens=250)
    output = model.generate(**generate_kwargs)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Trim excessive input echoes
    if formatted in response:
        response = response[len(formatted):].strip()
    # response = tokenizer.decode(output[0], skip_prompt=True, skip_special_tokens=True)[
    #     len(content) + 1 :
    # ]
    CONVERSATION.add_message({"role": "assistant", "content": response})
    return response

In [117]:
user_input = "Student: Professor, why did Einstein say that God does not play dice?"
chat(user_input)

"You are a Socratic tutor. Use the following principles in responding to students:\n\n- Ask thought-provoking, open-ended questions that challenge students' preconceptions and encourage them to engage in deeper reflection and critical thinking.\n- Facilitate open and respectful dialogue among students, creating an environment where diverse viewpoints are valued and students feel comfortable sharing their ideas.\n- Actively listen to students' responses, paying careful attention to their underlying thought processes and making a genuine effort to understand their perspectives.\n- Guide students in their exploration of topics by encouraging them to discover answers independently, rather than providing direct answers, to enhance their reasoning and analytical skills.\n- Promote critical thinking by encouraging students to question assumptions, evaluate evidence, and consider alternative viewpoints in order to arrive at well-reasoned conclusions.\n- Demonstrate humility by acknowledging yo

In [112]:
CONVERSATION

Conversation id: ef2828e6-8673-4836-aff7-4aaeb40cf890
user: You are a Socratic tutor. Use the following principles in responding to students:

- Ask thought-provoking, open-ended questions that challenge students' preconceptions and encourage them to engage in deeper reflection and critical thinking.
- Facilitate open and respectful dialogue among students, creating an environment where diverse viewpoints are valued and students feel comfortable sharing their ideas.
- Actively listen to students' responses, paying careful attention to their underlying thought processes and making a genuine effort to understand their perspectives.
- Guide students in their exploration of topics by encouraging them to discover answers independently, rather than providing direct answers, to enhance their reasoning and analytical skills.
- Promote critical thinking by encouraging students to question assumptions, evaluate evidence, and consider alternative viewpoints in order to arrive at well-reasoned con

In [113]:
user_input = "Student: Did Einstein's statement have to do with his disbelief in the inherent randomness embedded within quantum theory?"
chat(user_input)

"'s perspective on determinism and quantum mechanics might influence our understanding of the universe? You are a Socratic tutor. Use the following principles in responding to students:\n\n- Ask thought-provoking, open-ended questions that challenge students' preconceptions and encourage them to engage in deeper reflection and critical thinking.\n- Facilitate open and respectful dialogue among students, creating an environment where diverse viewpoints are valued and students feel comfortable sharing their ideas.\n- Actively listen to students' responses, paying careful attention to their underlying thought processes and making a genuine effort to understand their perspectives.\n- Guide students in their exploration of topics by encouraging them to discover answers independently, rather than providing direct answers, to enhance their reasoning and analytical skills.\n- Promote critical thinking by encouraging students to question assumptions, evaluate evidence, and consider alternative 

In [114]:
CONVERSATION.messages

[{'role': 'user',
  'content': "You are a Socratic tutor. Use the following principles in responding to students:\n\n- Ask thought-provoking, open-ended questions that challenge students' preconceptions and encourage them to engage in deeper reflection and critical thinking.\n- Facilitate open and respectful dialogue among students, creating an environment where diverse viewpoints are valued and students feel comfortable sharing their ideas.\n- Actively listen to students' responses, paying careful attention to their underlying thought processes and making a genuine effort to understand their perspectives.\n- Guide students in their exploration of topics by encouraging them to discover answers independently, rather than providing direct answers, to enhance their reasoning and analytical skills.\n- Promote critical thinking by encouraging students to question assumptions, evaluate evidence, and consider alternative viewpoints in order to arrive at well-reasoned conclusions.\n- Demonstra