In [1]:
from Models.models import LLMModel
from langchain_core.messages import (HumanMessage)
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel, Field, validator
import json

## Create a simple callable model

In [2]:


model = LLMModel(
    provider="openai_api",
    model="gpt-4-1106-preview",
    output_schema=None, 
    )


# YOU CAN EITHER PASS A LIST OF LANGCHAIN CORE MESSAGES OR A DICTIONARY WITH KEY `MESSAGES`
response = model(
    [HumanMessage(content="Who is the president of Nigeria")]
    )
print(response)

print("--------------------------------")

response = model(
    {
        "messages": [HumanMessage(content="Who is the president of Nigeria")]
    }
    )
print(response)


As of my knowledge cutoff in 2023, the President of Nigeria is Muhammadu Buhari. He has been in office since May 29, 2015. However, if you are asking this question at a later date, the officeholder might have changed, particularly after elections or due to other political developments. It is advisable to check the latest information from a reliable source for the current president.
--------------------------------
As of my knowledge cutoff date in early 2023, the President of Nigeria is Muhammadu Buhari. He has been serving as President since May 29, 2015, after winning the general election. However, given that Nigeria holds presidential elections every four years, and the next election is due in 2023, there may be a new President if you are asking this question after the elections have taken place. Please verify with the most recent sources to get the current President of Nigeria.


### Adding Output Schema and prompt template


In [3]:
class Joke(BaseModel):
    """Joke model"""
    setup: str = Field(..., description="setup for the joke")
    punchline: str = Field(..., description="punchline for the joke")


prompt_template = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "Given this topic={topic}, generate a joke."
                )
               
            ]
        )



model1 = LLMModel(
    provider="fireworks_api",
    model="llama-v3-70b-instruct",
    output_schema=Joke, #  This will allow the output to be validated but you can use or Joke.model_json_schema() instead
    try_to_parse = True, # model will return JSON object based on the output schema
    config = {"retry": 2, "retry_with_history": True}, #if model fails to output parsable response, it retries 2 time without providing failed response as feedback
    prompt_template=prompt_template
    )


inputs = {"topic": "dad jokes"}
response = model1(inputs)
    
print(response)
print(type(response))

Validating output schema.....
{'setup': 'Why did the scarecrow win an award?', 'punchline': 'Because he was outstanding in his field!'}
<class 'dict'>


## Adding Extra Model configuration (Parameters and Model Kwargs)

You can add parameter such as:

1. **retry:** if try_to_parse is set yo true, the module will retry when parsing fails, this value determines how many times it will retry
2. **retry_with_history:** if true , the previous AI response will be added to the chat while retrying to improve response



In [4]:
from Models.schemas import AspectorEvaluatorInputSchema, FeedbackISC, FeedbackBasic,  QualityAspect, AspectorRole

model = LLMModel(
    provider="openai_api",
    model="gpt-4-1106-preview",
    output_schema=FeedbackBasic,
    prompt_template=ChatPromptTemplate.from_messages([("system", "You are a funny virtual assistant")]), #add extra personality configuration
    try_to_parse = True, 
    config = {"retry": 2, 
              "retry_with_history": True,
              "params": {'temperature':0.9}, # Extra model parameters goes here
              #"model_kwargs": {}  # Extra kwargs can be added here
              }
    )


messages=[HumanMessage(content="What do you call a dog with 3 legs?"),]
response = model(messages)
print(response)

print("--------------------------------")

messages=[HumanMessage(content="What about  a dog with 2 legs?"),]
response = model(messages)
print(response)

print("--------------------------------")

messages=[HumanMessage(content="1 leg?"),]
response = model(messages)
print(response)

Validating output schema.....
{'response': "It doesn't matter what you call him, he's still going to be your loyal, adorable tripod!"}
--------------------------------
Validating output schema.....
{'response': "Call him whatever you like, but don't be surprised if he doesn't come running. He's probably too busy being a little bipawd hero!"}
--------------------------------
Validating output schema.....
{'response': "A one-legged dog? That sounds like a rough situation, but let's call him 'Uno' – because he's one of a kind and still 'number one' in your heart!"}


## CHAT HISTORY AND TOKEN COUNT
1. use `model.chat_history`  to retrieve chat history without system prompt (or prompt template)
2. use `model.get_chat_history()` to retrieve chat history with system prompt (or prompt template)
3. use `model.chat_history_untouched` to retrieve chat history with system prompt and failed responses (failed parsed responses)
4. use `model.get_total_tokens()` to retrieve  both token count and content used to retrieve the count

In [5]:
# without system prompt
model.chat_history

[HumanMessage(content='What do you call a dog with 3 legs?'),
 AIMessage(content='```json\n{\n  "response": "It doesn\'t matter what you call him, he\'s still going to be your loyal, adorable tripod!"\n}\n```', response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 186, 'total_tokens': 218}, 'model_name': 'gpt-4-1106-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c04ce758-129d-44fb-91c9-4bcd5b4c5684-0'),
 HumanMessage(content='What about  a dog with 2 legs?'),
 AIMessage(content='```json\n{\n  "response": "Call him whatever you like, but don\'t be surprised if he doesn\'t come running. He\'s probably too busy being a little bipawd hero!"\n}\n```', response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 236, 'total_tokens': 278}, 'model_name': 'gpt-4-1106-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e3f78c32-1b56-47cc-8df2-4d63e19e5eb2-0'),
 HumanMessage(con

In [6]:
#with system prompt
model.get_chat_history()

[SystemMessage(content='\nYou are a funny virtual assistant\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"response": {"description": "The response string returned by the LLM", "title": "Response", "type": "string"}}, "required": ["response"]}\n```'),
 HumanMessage(content='What do you call a dog with 3 legs?'),
 AIMessage(content='```json\n{\n  "response": "It doesn\'t matter what you call him, he\'s still going to be your loyal, adorable tripod!"\n}\n```', response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 186, 'total_tokens': 218}, 'mod

In [7]:
#with failed parses
model.chat_history_untouched

[HumanMessage(content='What do you call a dog with 3 legs?'),
 AIMessage(content='```json\n{\n  "response": "It doesn\'t matter what you call him, he\'s still going to be your loyal, adorable tripod!"\n}\n```', response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 186, 'total_tokens': 218}, 'model_name': 'gpt-4-1106-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c04ce758-129d-44fb-91c9-4bcd5b4c5684-0'),
 HumanMessage(content='What about  a dog with 2 legs?'),
 AIMessage(content='```json\n{\n  "response": "Call him whatever you like, but don\'t be surprised if he doesn\'t come running. He\'s probably too busy being a little bipawd hero!"\n}\n```', response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 236, 'total_tokens': 278}, 'model_name': 'gpt-4-1106-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e3f78c32-1b56-47cc-8df2-4d63e19e5eb2-0'),
 HumanMessage(con

In [8]:
#Get Tokens
tokens, tokens_string = model.get_total_tokens()

DEBUG: ...API token count


In [9]:
tokens

{'in': 711, 'out': 123}

In [10]:
tokens_string["in"]

['\nYou are a funny virtual assistant\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"response": {"description": "The response string returned by the LLM", "title": "Response", "type": "string"}}, "required": ["response"]}\n```What do you call a dog with 3 legs?',
 '\nYou are a funny virtual assistant\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\

In [11]:
tokens_string["out"]

['```json\n{\n  "response": "It doesn\'t matter what you call him, he\'s still going to be your loyal, adorable tripod!"\n}\n```',
 '```json\n{\n  "response": "Call him whatever you like, but don\'t be surprised if he doesn\'t come running. He\'s probably too busy being a little bipawd hero!"\n}\n```',
 '```json\n{\n  "response": "A one-legged dog? That sounds like a rough situation, but let\'s call him \'Uno\' – because he\'s one of a kind and still \'number one\' in your heart!"\n}\n```']

###  Turn off chat History

This will save token cost since every new message is passed to the model without the chat history included


In [12]:
model = LLMModel(
    provider="openai_api",
    model="gpt-4-1106-preview",
    use_history = False,     
    )


messages=[HumanMessage(content="What do you call a dog with 3 legs?"),]
response = model(messages)
print(response)

print("--------------------------------")

messages=[HumanMessage(content="What about  a dog with 2 legs?"),]
response = model(messages)
print(response)

print("--------------------------------")

messages=[HumanMessage(content="What was my first message?"),]
response = model(messages)
print(response)




Validating output schema.....
{'response': "A dog with three legs can be called a 'tripod,' which is a term sometimes used affectionately to describe animals with three limbs. However, it's important to treat all animals with respect, regardless of their physical differences."}
--------------------------------
Validating output schema.....
{'response': 'A dog with two legs would have significant challenges in mobility and would require special care. They may need a custom wheelchair or harness to help them move around, and they would also need a safe environment to prevent injuries. Veterinary care, physical therapy, and a loving, supportive home are important for such a special needs dog.'}
--------------------------------
Validating output schema.....
{'response': 'What was my first message?'}


In [13]:
model.chat_history #You will get and empty history here

[]

## JUDGE USE CASE

## Using as an Evaluator (JUDGE USE CASE)

The Models.schemas module comes with some pre-defined pydantic schemas:
1. **FeedbackISC**: This can be used as an output schema for evaluation to get `Issues`, `Score` and `Comments`
2. **FeedbackBasic**: This can be used as an output schema to get a a dictionary with response (Default value when `try_to_parse` is set to true)
3. **QualityAspect**: This is used to define the Evaluation Quality aspect with an `AspectorEvaluatorInputSchema` schema
4. **AspectorRole**: This is used to specify if the evaluator should judge only the `USER`, `ASSISTANT` or both (`USER_AND_ASSISTANT`) etc

In [14]:
from langchain.prompts import HumanMessagePromptTemplate

In [15]:
conversation = {'status': 'OK',
 'metadata': {'metadata': '# Metadata\n\n**Python Topics** - algorithms > by_topic > probability\n\n**Type** - query\n\n**Target Number of Turns (User + Assistant)** - 2\n'},
 'conversation': [{'cell_pos': 1,
   'role': 'User',
   'content': 'Could you show me a Python function to calculate the probability of rolling a sum of 8 with two six-sided dice?',
   'type': 'markdown'},
  {'cell_pos': 2,
   'role': 'Assistant',
   'content': 'Certainly! When you roll two six-sided dice, there are 36 possible outcomes (as there are 6 possible outcomes for each die). To determine the probability of obtaining a sum of 8, you must identify the favorable outcomes (those resulting in a sum of 8) and then divide this count by the total number of possible outcomes.\n\nHere is a python function for it:',
   'type': 'markdown'},
  {'cell_pos': 3,
   'role': 'Assistant',
   'content': 'def probability_of_rolling_sum_eight():\n    favorable_outcomes = 0\n    total_outcomes = 36 # There are 6 * 6 = 36 total outcomes\n\n    for dice_1 in range(1, 7):\n        dice_2 = 8 - dice_1\n\n        # Check if the value is valid\n        if 1 <= dice_2 <= 6:\n            favorable_outcomes += 1\n\n    return favorable_outcomes / total_outcomes\n\n# Call the function and print the result\nprobability_of_rolling_sum_eight = probability_of_rolling_sum_eight()\n\nprint(f"The probability of rolling a sum of 8 with two six-sided dice is: {probability_of_rolling_sum_eight}")',
   'type': 'code'},
  {'cell_pos': 4,
   'role': 'Assistant',
   'content': 'The time complexity of the `probability_of_rolling_sum_eight` function is O(N) since it undergoes a constant number of iterations (6 iterations, corresponding to each possible value of dice_1 ranging from 1 to 6).The loop entails basic arithmetic operations and conditional checks, all with constant time complexity.\n\nAs a result, the time complexity remains constant and is not influenced by the input size, irrespective of the number of sides on the dice. The function is specifically tailored for six-sided dice, ensuring a consistent and limited number of iterations.',
   'type': 'markdown'},
  {'cell_pos': 5,
   'role': 'User',
   'content': 'Could you kindly provide the result with precision up to four decimal places only?',
   'type': 'markdown'},
  {'cell_pos': 6,
   'role': 'Assistant',
   'content': "Certainly! Here's the revised version of the function that will provide the outcome with accuracy up to four decimal places.",
   'type': 'markdown'},
  {'cell_pos': 7,
   'role': 'Assistant',
   'content': 'def probability_of_rolling_sum_eight():\n    favorable_outcomes = 0\n    total_outcomes = 36 # There are 6 * 6 = 36 total outcomes\n\n    for die1 in range(1, 7):\n        die2 = 8 - die1\n\n        # Check if the value is valid\n        if 1 <= die2 <= 6:\n            favorable_outcomes += 1\n\n    return favorable_outcomes / total_outcomes\n\n# Call the function and print the result\nprobability_of_rolling_sum_eight = probability_of_rolling_sum_eight()\n\nprint(f"The probability of rolling a sum of 8 with two six-sided dice is: {probability_of_rolling_sum_eight: .4f}")',
   'type': 'code'}]}

chat_template = ChatPromptTemplate.from_messages(
        [
            SystemMessagePromptTemplate.from_template(
                """Name: AI Assistant Perfector.
Profile: You are an expert in perfecting AI Assistants' response content based on the user's expertise level."""
            ),
            HumanMessagePromptTemplate.from_template(
                """
Given the following conversation between Human User and AI Assistant, find issues following the rules described below and rate the total conversation.
Single significant instance of deviation from the rules - score 1 or 2. More issues score<2. No issues=5.

Qualities we care about. Focus on them and only find issues that are directly related to them:
```
You must assume the user just started to learn about the question that is asked, so the 
reply should cover all the points that the user might be new to, and assume the 
user has basic knowledge about the prerequisites. 

This helps us keep the explanation clean, and makes it useful to the user rather 
than throwing all information about the topic to the user.

It is important to identify the query intent to gauge the user knowledge level as well as 
the code complexity to provide the most useful explanation.
```

The task:
```
Please, detect all mismatches between user's expertise level shown and the replies of the Assistant.
If User expertise level is unknown - asumme they are a beginner in that question.
Mismatches might include but not limited to:
    - too much explanation for an expert
    - too little explanation for a beginner
    - Assistant assumes the user is not the beginner in the question asked be it an algo or a technology or something else.

Assume basic knowledge of Python programming by the user and so no need to explain basic things unless asked to.
For example, if the question is about an algorithm in python, assume understnding of Python but a beginner level in algorithms UNLESS USER SHOWS OR STATES A HIGHER OR LOWER LEVEL OF EXPERTISE.

If no issues found, do not create any.
Correctness or accuracy is not your concern and will be handled by other evaluators. Focus only on the serving user's level of expertise in the most helpful manner.
```

Conversation:
CONVERSATION_START
{conversation}
CONVERSATION_END

Now, proceed to completing your task of finding issues and scoring the conversation.
"""
            ),
        ]
    )

class UserExpertise(BaseModel):
    """User expertise level in a inquiry topic."""

    inquiry_topic: str = Field(..., description="The topic of the user's inquiry.")
    level: str = Field(
        ...,
        description="The user's expertise level in the inquiry topic. Even if assumed. Be concise, 1 sentence max.",
    )
    
class Intent(BaseModel):
    """User intent."""

    cell_pos: int
    intent: str = Field(
        ...,
        description="User intent, concise, single sentence per user reply. Avoid Assistant intent",
    )



In [16]:
from Models.schemas import NotebookWiseFeedback

In [17]:
evaluator = LLMModel(
    provider="openai_api",
    model="gpt-4-turbo",
    output_schema=NotebookWiseFeedback,
    name="aspect_evaluator",
    prompt_template=chat_template,
    try_to_parse=True,
    # config = {"retry": 5, 
    #           "retry_with_history": True,
    #           "params": {'temperature':0.9},
    #           }
)


# Perform evaluation
evaluation_result = evaluator({
    "conversation": conversation
})

evaluation_result



Validating output schema.....


{'user_intents': [{'cell_pos': 1,
   'intent': 'User asks for a Python function to calculate the probability of a specific dice roll sum.'},
  {'cell_pos': 5,
   'intent': 'User requests the result of the function to be displayed with precision up to four decimal places.'}],
 'user_expertise_level': {'inquiry_topic': 'Probability calculation using Python',
  'level': 'Beginner'},
 'scratchpad': "User asks about implementing a probability function in Python, indicating familiarity with basic Python but possibly new to probability computations. The assistant's explanations should be clear, not assuming advanced knowledge of probability, but can skip basic Python syntax. The assistant provided a function, explained the possible outcomes and the calculation method clearly, and adjusted the function as per user's request for precision.",
 'issues': [{'type': 'Explanation Depth',
   'reason': "The assistant's explanation of the function's time complexity may be too advanced for a user who is

### With JSON OUTPUT Schema:
NOTE THAT THIS METHOD WONT BE ABLE TO VALIDATE THE OUTPUT

In [18]:
evaluator = LLMModel(
    provider="openai_api",
    model="gpt-4-turbo",
    output_schema=NotebookWiseFeedback.model_json_schema(),
    name="aspect_evaluator",
    prompt_template=chat_template,
    try_to_parse=True,
    # config = {"retry": 5, 
    #           "retry_with_history": True,
    #           "params": {'temperature':0.9},
    #           }
)


# Perform evaluation
evaluation_result = evaluator({
    "conversation": conversation
})

evaluation_result

{'user_intents': [{'cell_pos': 1,
   'intent': 'User requests a Python function for calculating probability of a specific dice roll.'},
  {'cell_pos': 5,
   'intent': "User asks for precision adjustment in the function's result."}],
 'user_expertise_level': {'inquiry_topic': 'Python programming with a focus on probability calculations',
  'level': 'beginner'},
 'scratchpad': "The user asks about a specific probability calculation using Python, indicating a beginner level in this topic. The Assistant provides a detailed explanation and a function suitable for a beginner, explaining the steps clearly. The follow-up request is also handled appropriately with added precision in the result. Overall, the Assistant's responses seem well-tailored to the user's expertise level, assuming a beginner's understanding of the topic while not requiring any prior deep knowledge of probability or advanced Python features.",
 'issues': [],
 'score': 5}