# LLM Model

In [1]:
from google.colab import userdata

hf_key = userdata.get('HF_TOKEN')

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    token=hf_key
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", token=hf_key)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

config.json:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Device set to use cuda


# Tools

In [3]:
def extend_search_new(text, span):
    start, end = span
    nest_count = 1
    for i in range(end, len(text)):
        if text[i] == '{':
            nest_count += 1
        elif text[i] == '}':
            nest_count -= 1
            if nest_count == 0:
                return text[start:i+1]
    return text[start:end]

In [4]:
import re
import json

def extract_json(text_response):
    pattern = r'\{.*?\}'
    matches = re.finditer(pattern, text_response, re.DOTALL)
    json_objects = []

    for match in matches:
        json_str = extend_search_new(text_response, match.span())
        try:
            json_obj = json.loads(json_str)
            json_objects.append(json_obj)
        except json.JSONDecodeError:
            continue

    return json_objects if json_objects else None

In [5]:
def get_oscars_data(
    category=None,
    year=None,
    nominee=None,
    movie=None,
    won=None
):
    """
    Get the Oscars data for specific information.

    Parameters:
        category (str): The category of the award (e.g., "Best Actress").
        year (str or int): The year of the award.
        nominee (str): The name of the nominee.
        movie (str): The title of the movie.
        won (bool): Whether to filter by winners only (True or False).

    Returns:
        list: A list of matching entries from the Oscars data.
    """
    results = []
    file_path = '/content/oscar-nominations.json'

    with open(file_path, 'r') as file:
        oscars_data = json.load(file)

    for entry in oscars_data:
        if category not in [None, "", "unknown"] and entry.get("category", "").lower() != category.lower():
            continue

        if year not in [None, "", "unknown"] and str(entry.get("year", "")) != str(year):
            continue

        if nominee not in [None, "", "unknown"] and nominee.lower() not in [nom.lower() for nom in entry.get("nominees", [])]:
            continue

        if movie not in [None, "", "unknown"]:
            if not any(movie_entry.get("title", "").lower() == movie.lower() for movie_entry in entry.get("movies", [])):
                continue

        if won is not None and entry.get("won", False) != won:
            continue

        results.append(entry)

    return results


# Agent prompts

In [6]:
query_generator_prompt = """
Use Thought to understand the question you have been asked.

First, determine if the question is related to OSCAR AWARDS (ACADEMY AWARDS). A question is related to Oscars ONLY if it explicitly asks about nominees, winners, or categories of the Oscars (e.g., "Who won the Oscar for Best Actor in 2023?" or "Was Emma Stone nominated for an Oscar?"). If the question is about general information about an actor, actress, movie, or anything unrelated to the Oscars, classify it as "not_about_oscars."

PARAMETERS:
    category (str): The category of the award (e.g., "Best Actress").
    year (str or int): The year of the award.
    nominee (str): The name of the nominees (include only if the question involves a specific nominee (e.g. "Was Emma Stone nominated for an Oscar in 2017?")).
    movie (str): The title of the movie.
    won (bool): Whether to filter by winners only (True or False).

### Important Rules:
1. **Oscar-Related Classification**:
   - A question is about Oscars only if it involves nominees, winners, or categories of the Oscars.
   - Do NOT classify a question as Oscar-related if it asks for general information about an actor, actress, or movie without mentioning their Oscar connection (e.g., "Can you tell me about Emma Stone?").
   - If the question is unrelated to Oscars, respond with `"related_to_oscars": false`.

2. **Query Parameter Rules**:
   - If the specific nominee was not mentioned in the current question, "Who was nominated for...," If it does not contain any name, DO NOT INCLUDE the `nominee` parameter in the query, as the JSON file inherently CONTAINS ONLY NOMINEES. ONLY INCLUDE `category` and `year` parameters.
   - If the question involves a SPECIFIC NOMINEE (e.g., "Was Emma Stone nominated for an Oscar in 2017?"), then include 'nominee' parameter with name of actor/movie.
   - Always generate the query based on available information and context.

If the question is about Oscars, generate query parameters that can meet these requirements:

Consider the context of the previous question if available. Use that context to fill in any missing details in the current question. If there is no relevant context, create the query based only on the current question.

If the question is NOT about Oscars, respond with the classification `"related_to_oscars": false`.

Your output should always be in JSON format. Examples:

Example 1:

Context: {"category": "Best Actor", "year": "2023", "won": true}
Question: What about Best Actress?
Thought: I should use the year 2023 from the context and make a query for Best Actress.
Query:
{
  "related_to_oscars": true,
  "query": {
    "category": "Best Actress",
    "year": "2023",
    "won": true
  }
}

Example 2:

Question: Who won the Oscar's for Best Actress in 2023?
Thought: I should make a query for this question directly.
Query:
{
  "related_to_oscars": true,
  "query": {
    "category": "Best Actress",
    "year": "2023",
    "won": true
  }
}

Example 3:

Question: What's the capital of France?
Answer:
{
  "related_to_oscars": false
}

Example 4:

Question: Can you tell me something about Emma Stone?
Answer:
{
  "related_to_oscars": false
}

Example 5:

Question: Was Emma Stone nominated for an Oscar in 2017?
Thought: I should classify this as related to Oscars because it explicitly asks about Oscar nominations, and also contains specific actor/actress name about nomination so i will set 'nominee' parameter.
Query:
{
  "related_to_oscars": true,
  "query": {
    "nominee": "Emma Stone",
    "year": "2017"
  }
}

Example 6:

Question: Was movie Oppenheimer nominated for an Oscar in 2023?
Thought: I should classify this as related to Oscars because it explicitly asks about Oscar nominations, and also contains specific movie name about nomination so I will set 'nominee' parameter.
Query:
{
  "related_to_oscars": true,
  "query": {
    "nominee": "Oppenheimer",
    "movie": "Oppenheimer,
    "year": "2023"
  }
}

Example 7:

Question: Who was nominated for an Oscar for Best Picture in 2023?
Thought: I should classify this as related to Oscars because it explicitly asks about Oscar nominations, and it doesn't contain specific name so I won't put 'nominee' parameter in query, I will put just 'category' and 'name'.
Query:
{
  "related_to_oscars": true,
  "query": {
    "category": "Best Picture",
    "year": "2023"
  }
}

Example 8:

Question: What is the runtime of the movie La La Land?
Answer:
{
  "related_to_oscars": false
}


"""

oscar_fetching_prompt = """
You run in a loop of Thought, Action.
At the end of the loop you output an Answer.

Use Thought to understand the input you have been given.
Use Action to run one of the actions available to you.

Your available actions are:

get_oscars_data:
e.g. get_oscars_data(category="Best Actress", year="2023", nominee="Emma Stone", movie="Poor Things", won=true}

Returns the fetched data about Oscar's;

Example session:

Question: {
  "category"="Best Actress",
  "year"="2023",
  "won"=true
}


Thought: I should use tool get_oscars_data and give it this input to get answer.
Action:
{
  "function_name": "get_oscars_data",
  "function_parms": {
    "category":"Best Actress",
    "year":"2023",
    "won":true
  }
}

And then you ONLY NEED TO OUTPUT ACTION AND NOTHING ELSE:

Answer:
{
  "function_name": "get_oscars_data",
  "function_parms": {
    "category":"Best Actress",
    "year":"2023",
    "won":true
  }
}

"""

text_generator_prompt = """
Use Thought to understand the input you have been given.
You are given the following context and query. Your task is to generate a coherent and informative response based on the query and the context. Do not mention any data fetching or underlying processes. Simply provide a direct and clear answer to the user's question.

You will get JSON input that has data about Oscar's Awards. Use that and Context data, if needed, to make coherent sentence.
If the data is missing or no results are found, please generate a response explaining the absence of data or provide a suitable answer. For example, if the query was about a nomination, the response could be something like "No, [name] wasn't nominated in [year]" or a similar phrase that fits the situation.

The answer should be informative and context-aware. The answer SHOULD BE USER-FRIENDLY and informative WITHOUT referencing any processes or data fetching.
Output should only be that sentence you generated.
"""

general_answers_prompt = """
You are an AI assistant designed to answer general knowledge questions clearly and concisely. Provide accurate and well-formatted responses.
"""


# Main

In [7]:
context = {
    "last_query": None,
    "last_parameters": {},
    "last_response": None
}

In [8]:
def run_phi_agent(role, query=None, data=None):
    """
    Executes a task for a specific role using Phi-3.5.
    Args:
        role (str): The agent's role ('query_generator', 'oscar_fethcing', 'text_generator', 'general_answers').
        query (str): The user's query for the task (used for 'query_generator', 'oscar_fethcing', 'general_answers').
        data (str): The data to summarize (used for 'text_generator').
    Returns:
        str: The LLM's response.
    """

    if role == "query_generator":
      messages = [
        {"role": "system", "content": query_generator_prompt},
        {"role": "user", "content": f"Previous query: {context['last_query']} Current query: {query}"},
      ]
    elif role == "oscar_fetching":
      messages = [
        {"role": "system", "content": oscar_fetching_prompt},
        {"role": "user", "content": query},
      ]
    elif role == "text_generator":
      messages = [
        {"role": "system", "content": text_generator_prompt},
        {"role": "user", "content": f"Previous query: {context['last_query']} Current query: {query} Fetched data: {data}"},
      ]
    elif role == "general_answers":
      messages = [
        {"role": "system", "content": general_answers_prompt},
        {"role": "user", "content": query},
      ]
    else:
        raise ValueError(f"Invalid role: {role}")

    response = pipe(messages, **generation_args)
    generated_text = response[0]["generated_text"]

    available_actions = {
      "get_oscars_data": get_oscars_data
    }

    print("GENERATED TEXT " + generated_text)

    json_function = extract_json(generated_text)

    if role == "query_generator" and json_function:
        if json_function[0].get("related_to_oscars") is False:
            return {"general_question": query}

    if json_function is not None and "function_name" in json_function[0]:
            function_name = json_function[0]['function_name']
            function_parms = json_function[0]['function_parms']

            if function_name not in available_actions:
                raise Exception(f"Unknown action: {function_name}: {function_parms}")

            print(f" -- running {function_name} {function_parms}")

            action_function = available_actions[function_name]

            try:
              result = action_function(**function_parms)
            except Exception as e:
              raise Exception(f"Error calling the action function: {e}")

            generated_text = f"Action_Response: {result}"

    return generated_text

In [9]:
def interactive_chat():
    print("Type your question or 'exit' to quit.")
    while True:
        user_query = input("User: ")
        if user_query.lower() == "exit":
            print("Goodbye!")
            break

        try:
            task_instructions = run_phi_agent("query_generator", query=user_query)

            if "general_question" in task_instructions:
                general_answer = run_phi_agent("general_answers", query=task_instructions["general_question"])
                print("\n--- Results ---")
                print("Answer:", general_answer)
                print("\n----------------")
                continue

            context['last_query'] = user_query
            context['last_parameters'] = extract_json(task_instructions)[0]

            fetched_data = run_phi_agent("oscar_fetching", query=task_instructions)
            print("FETCHED DATA " + fetched_data)
            generated_text = run_phi_agent("text_generator", query=user_query, data=fetched_data)

            context['last_response'] = generated_text

            print("\n--- Results ---")
            print(f"Answer: {generated_text}")
            print(f"Query: {task_instructions}")
            print(f"Search Results: {fetched_data}")
            print("\n----------------")
        except Exception as e:
            print(f"Error: {str(e)}")


In [12]:
interactive_chat()

Type your question or 'exit' to quit.
User: Who won the Oscar for Best Actress in 2023?
GENERATED TEXT  Answer:
{
  "related_to_oscars": true,
  "query": {
    "category": "Best Actress",
    "year": "2023",
    "won": true
  }
}

Explanation:
The current query is asking about the winner of the Oscar for Best Actress in the year 2023. This question is directly related to the Oscars as it involves a specific category (Best Actress) and a specific year (2023). The 'won' parameter is set to true to filter the query for winners only. Therefore, the query is classified as related to Oscars.
GENERATED TEXT  {
  "function_name": "get_oscars_data",
  "function_parms": {
    "category":"Best Actress",
    "year":"2023",
    "won":true
  }
}

Answer:
{
  "function_name": "get_oscars_data",
  "function_parms": {
    "category":"Best Actress",
    "year":"2023",
    "won":true
  }
}

Explanation:
Based on the provided query, the appropriate action is to use the "get_oscars_data" function to fetch 