# Overview

**Using LLM to assess user's answers in a way that can easily be integrated into AI app logic.**

### <font color='gray'>Loading libraries, making initial settings</font>

In [1]:
import os, sys
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv("../settings/.env", override=True)

True

In [2]:
import pydantic
import instructor
import openai
from enum import Enum
from typing import Tuple, ForwardRef, List
from pydantic import BaseModel, Field

# Enables the response_model 
instructor.patch()

In [6]:
import json
from IPython.display import display, HTML

# A function to output JSON in a readable form
def json_pretty_print(json_string):

    # Load the JSON string
    json_data = json.loads(json_string)

    # Convert the JSON data to a pretty-printed string
    pretty_json = json.dumps(json_data, indent=4)

    # Display the pretty-printed JSON string as preformatted text in HTML
    display(HTML(f'<pre>{pretty_json}</pre>'))

# Now let's see how Instructor+Pydantic can help us get predictable data structures as outputs of an LLM

Let's ask LLM to assess user's answer to a specific question.

In [8]:
# Possible assessment statuses
class Status(str, Enum):
    correct = "correct"
    incorrect = "incorrect"

# We will use this to model the output of our LLM
class QAAssessment(BaseModel):
    question: str = Field(..., description="Question")
    answer: str = Field(..., description="Answer")
    assessment_status: Status = Field(..., description="Assessment status")
    assessment_comment: str

# Let's do a 10 cycle test to assess how stable is the result
for i in range(10):
    pydtest: QAAssessment = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        temperature=0,
        response_model=QAAssessment,
        messages=[
            {"role": "system", "content": "You are an assisstant that assesses if the Human knows the colors of fruits."},
            {"role": "assistant", "content": "What is the color of an apple?"},
            {"role": "user", "content": "Blue"},
        ]
    )

    json_response = pydtest.model_dump_json()
    json_pretty_print(json_response)

**As you can see instead of just getting a text answer from AI, we are getting a data structure that we can use in our AI app logic.**

**Now, what if we want to give user another chance before giving final assessment?**

In [9]:
import random

# Now we have more possible assessment statuses
class Status(str, Enum):
    correct_preliminary = "correct_preliminary"
    incorrect_preliminary = "incorrect_preliminary"
    correct_final = "correct_final"
    incorrect_final = "incorrect_final"

class QAAssessment(BaseModel):
    question: str = Field(..., description="Question")
    answer: str = Field(..., description="Answer")
    assessment_status: Status = Field(..., description="Assessment status")
    assessment_comment: str


answers = ["Blue", "Purple", "Red"]



for i in range(10):
    print(f"\n\nRound {i+1}")

    messages = [
        {"role": "system", "content": "You are an assisstant that assesses if the Human knows the colors of fruits. If the answer is incorrect, you ask the question once again. You do this 3 times and after that you give your final assessment. If the answer is correct, you give final assessment."},
        {"role": "assistant", "content": "What is the color of an apple?"},
        {"role": "user", "content": random.choice(answers)},
    ]    
    
    final_assessment = 0

    j = 0
    while not final_assessment:
        print(f"Attempt {j+1}")
    
        pydtest: QAAssessment = openai.ChatCompletion.create(
            model="gpt-3.5-turbo-0613",
            temperature=0,
            response_model=QAAssessment,
            messages=messages
        )

        json_response = pydtest.model_dump_json()
        json_pretty_print(json_response)

        if pydtest.assessment_status == Status.incorrect_preliminary or pydtest.assessment_status == Status.correct_preliminary:
            messages.append({"role": "assistant", "content": "What is the color of an apple?"})
            messages.append({"role": "user", "content": random.choice(answers)})
        else:
            final_assessment = 1
        
        j += 1




Round 1
Attempt 1




Round 2
Attempt 1


Attempt 2


Attempt 3




Round 3
Attempt 1




Round 4
Attempt 1




Round 5
Attempt 1


Attempt 2


Attempt 3




Round 6
Attempt 1


Attempt 2


Attempt 3




Round 7
Attempt 1




Round 8
Attempt 1


Attempt 2


Attempt 3




Round 9
Attempt 1


Attempt 2




Round 10
Attempt 1


Attempt 2


Attempt 3


**Works great!**

**Now, let's make our assessment logic even more complex!**

In [10]:
import random


class Status(str, Enum):
    correct_preliminary = "correct_preliminary"
    incorrect_preliminary = "incorrect_preliminary"
    correct_final = "correct_final"
    incorrect_final = "incorrect_final"

class QAAssessment(BaseModel):
    question: str = Field(..., description="Question")
    response: str = Field(..., description="Combined answer consisting of all the responses of the Human")
    assessment_status: Status = Field(..., description="Assessment status")
    assessment_comment: str
    followup_question: str = Field(..., description="A question we ask the Human if the assessment status is not final yet")


question = "What are the main functions of Generative AI?"


for i in range(10):
    print(f"\n\nRound {i+1}")

    response = input(question)
    
    messages = [
        {
            "role": "system",
            "content": f"""
                    Act as an assisstant that assesses Human's responses to the Question against the Ideal Answer.
                    
                    The Question is: {question}
                    The Ideal Answer is:
                    ```
                    1. Content Creation
                    2. Data Augmentation
                    3. Pattern Discovery and Simulation
                    ```
                    
                    Assessment process:
                    If the response covers all the points from the Ideal Answer, your assessment is correct_final.
                    Otherwise - you ask a follow-up question.
                    If after 2 Human's reponses, the combined responses don't cover all the points from the Ideal Answer, your assessment is incorrect_final. Otherwise - correct_final.
                """
        },
        {"role": "assistant", "content": question},
        {"role": "user", "content": response},
    ]    
    
    final_assessment = 0

    j = 0
    while not final_assessment:
        print(f"Attempt {j+1}")
    
        pydtest: QAAssessment = openai.ChatCompletion.create(
            model="gpt-3.5-turbo-0613",
            temperature=0,
            response_model=QAAssessment,
            messages=messages
        )

        json_response = pydtest.model_dump_json()
        json_pretty_print(json_response)
        

        if pydtest.assessment_status == Status.incorrect_preliminary or pydtest.assessment_status == Status.correct_preliminary:
            response = input(pydtest.followup_question)
            if response == "exit":
                break

            messages.append({"role": "assistant", "content": pydtest.followup_question})
            messages.append({"role": "user", "content": response})
        else:
            final_assessment = 1
        
        j += 1

    if response == "exit":
        break




Round 1
What are the main functions of Generative AI?content
Attempt 1


Can you think of any other functions of Generative AI?idk
Attempt 2


Can you think of any other functions of Generative AI?whatever
Attempt 3




Round 2
What are the main functions of Generative AI?wood
Attempt 1


Can you please provide more information about the main functions of Generative AI?table
Attempt 2


Can you provide more information about the main functions of Generative AI?goo
Attempt 3


Can you please provide more information about the main functions of Generative AI?whatever
Attempt 4




Round 3
What are the main functions of Generative AI?                    1. Content Creation                     2. Data Augmentation                     3. Pattern Discovery and Simulation
Attempt 1




Round 4
What are the main functions of Generative AI?exit
Attempt 1


**Amazing, now we can use this as a module in survey-type LLM apps!**