In [6]:
%load_ext jupyter_black

In [60]:
import os
import instructor
from pydantic import BaseModel
from openai import OpenAI
import getpass
from typing import Iterable, Literal

In [13]:
_API_KEY = getpass.getpass("Your API key: \n")

Your API key: 
 ········


In [16]:
open_ai = OpenAI(api_key=_API_KEY)
client = instructor.from_openai(open_ai, mode=instructor.Mode.TOOLS_STRICT)

## S2A (System 2 Attention)
Two steps
* Ask to rewrite the prompt to remove any unnecessary information
* Use the rewritten prompt for the answer

In [38]:
# Implementation following the  Instructor documentation
# https://python.useinstructor.com/prompting/zero_shot/s2a/


class FirstStep(BaseModel):
    relevant_context: str
    user_query: str


class SecondStep(BaseModel):
    answer: int


def rewrite_prompt(query):
    rewritten_prompt = client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=FirstStep,
        messages=[
            {
                "role": "user",
                "content": f"""
                    Given the following text by a user, extract the part
                    that is actually relevant to their question. Please
                    include the actual question or query that the user
                    is asking.

                    Text by user:
                    {query}
                    """,
            }
        ],
    )
    return rewritten_prompt


def generate_final_response(rewritten_prompt):
    final_response = client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=SecondStep,
        messages=[
            {
                "role": "user",
                "content": f"""{rewritten_prompt.relevant_context}
                    Question: {rewritten_prompt.user_query}""",
            }
        ],
    )
    return final_response

In [31]:
query = """Mary has 3 times as much candy as Megan.
        Mary then adds 10 more pieces of candy to her collection.
        Max is 5 years older than Mary.
        If Megan has 5 pieces of candy, how many does Mary have in total?
        """

# Step 1: Rewrite the prompt
rewritten_prompt = rewrite_prompt(query)
print(rewritten_prompt.relevant_context)

print(rewritten_prompt.user_query)
# > how many does Mary have in total?

# Step 2: Generate the final response
final_response = generate_final_response(rewritten_prompt)
print(final_response.answer)

Megan has 5 pieces of candy. Mary has 3 times as much candy as Megan, and then adds 10 more pieces of candy.
How many pieces of candy does Mary have in total?
25


In [37]:
psu_query = """As Pennsylvania's only land-grant university, Penn State 
           has a broad mission of teaching, research, and public service. 
           But that mission was not so grandly conceived in 1855, when the 
           Commonwealth chartered it as a college of agricultural science 
           to apply scientific principles to farming.

           Centre County became the site of the new college in response to a
           gift of 200 acres from gentleman farmer and ironmaster James Irvin of Bellefonte. 
           Founding President Evan Pugh drew on the scientific education he had 
           received in Europe to plan a curriculum that combined theoretical
           studies with practical applications.

           Planning for a new academic initiative in the information sciences was 
           introduced in 1997, and the School of Information Sciences and Technology was
           approved by Penn State's Board of Trustees soon after on September 11, 1998.
           
           The school opened its doors at Penn State's University Park campus on August 25, 1999,
           to 105 students and five full-time faculty members, who led 43 new courses.
           In total, 428 students enrolled in IST programs at 19 
           Penn State campuses across the state.

           Who was the first president of Penn State?
           """

In [36]:
# Step 1: Rewrite the prompt
rewritten_prompt = rewrite_prompt(query)
print(rewritten_prompt.relevant_context)

print(rewritten_prompt.user_query)
# > how many does Mary have in total?

# Step 2: Generate the final response
final_response = generate_final_response(rewritten_prompt)
print(final_response.answer)

As Pennsylvania's only land-grant university, Penn State has a broad mission of teaching, research, and public service. But that mission was not so grandly conceived in 1855, when the Commonwealth chartered it as a college of agricultural science to apply scientific principles to farming. Centre County became the site of the new college in response to a gift of 200 acres from gentleman farmer and ironmaster James Irvin of Bellefonte. Founding President Evan Pugh drew on the scientific education he had received in Europe to plan a curriculum that combined theoretical studies with practical applications. Planning for a new academic initiative in the information sciences was introduced in 1997, and the School of Information Sciences and Technology was approved by Penn State's Board of Trustees soon after on September 11, 1998. The school opened its doors at Penn State's University Park campus on August 25, 1999, to 105 students and five full-time faculty members, who led 43 new courses. I

# TODO: Fix the error

Hint: What's the expected type in the `response_model` for `generate_final_response`?

# Self-Ask

Steps:
* decide if follow-up questions are required
* generate the follow-up questions
* answer the follow-up questions
* answer the main query

In [40]:
# Implementation following the `Instructor` documentation
# https://python.useinstructor.com/prompting/zero_shot/self_ask/


class FollowUp(BaseModel):
    question: str
    answer: str


class SelfAskResponse(BaseModel):
    is_followup_required: bool
    follow_ups: list[FollowUp]
    final_answer: str


def self_ask(query):
    return client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=SelfAskResponse,
        messages=[
            {
                "role": "system",
                "content": f"""Query: {query}
                        Are follow-up questions needed?
                        If so, generate follow-up questions, their answers, and then the final answer to the query.
                        """,
            },
        ],
    )


query = "Who was president of the U.S. when superconductivity was discovered?"

response = self_ask(query)

print(response.is_followup_required)
for follow_up in response.follow_ups:
    print(follow_up)

print(response.final_answer)

True
question='When was superconductivity discovered?' answer='Superconductivity was discovered on April 8, 1911.'
question='Who was the president of the U.S. in 1911?' answer='William Howard Taft was the president of the U.S. in 1911.'
William Howard Taft was the president of the U.S. when superconductivity was discovered.


In [47]:
query = "What's the longest distance between two Penn State campuses?"

response = self_ask(query)

print(response.is_followup_required)
for follow_up in response.follow_ups:
    print(follow_up)

print(response.final_answer)

True
question='How many Penn State campuses are there?' answer='There are 24 Penn State campuses.'
question='What are the campuses with the longest distance between them?' answer='The campuses with the longest distance between them are Penn State University Park and Penn State Berks.'
question='What is the distance between Penn State University Park and Penn State Berks?' answer='The distance between Penn State University Park and Penn State Berks is approximately 124 miles.'
The longest distance between two Penn State campuses is approximately 124 miles, specifically between Penn State University Park and Penn State Berks.


# Few-shot prompting


In [53]:
class SentimentLabel(BaseModel):
    sentiment: str


def few_shot_sentiment_analysis(query):
    # examples are from https://arxiv.org/pdf/2202.12837
    return client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=SentimentLabel,
        messages=[
            {
                "role": "system",
                "content": f"""Circulation revenue has increased by 5% in Finland: positive
                            Panostaja did not disclose the purchase price: neutral
                            Paying off the national debt will be extremely painful: negative
                            {query}:
                        """,
            },
        ],
    )


query = "I did not think the movie was bad"
response = few_shot_sentiment_analysis(query)
print(response.sentiment)

positive


# TODO: 3-shot prompting to retrieve the org name given its mascot

# Analogical prompting


In [65]:
class RelevantProblem(BaseModel):
    problem_explanation: str
    solution: str


class AnswersToRelevantProblems(BaseModel):
    relevant_problems: list[RelevantProblem]
    answer: RelevantProblem


def analogical_prompting(query: str):
    return client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"""
                <problem>
                {query}
                </problem>

                Relevant Problems: Recall three relevant and
                distinct problems. For each problem, describe
                it and explain the solution before solving
                the problem
                """,
            }
        ],
        model="gpt-4o-mini",
        response_model=AnswersToRelevantProblems,
    )


query = "Which country has the third largest GDP per capita in South Asia?"
response = analogical_prompting(query)
for problem in response.relevant_problems:
    print(problem.model_dump_json(indent=2))

print(response.answer.model_dump_json(indent=2))

{
  "problem_explanation": "Identifying the countries in South Asia and their respective GDP per capita figures. This involves collecting data from reliable economic sources or databases to find the relevant statistics for each country in the region.",
  "solution": "By researching the GDP per capita of countries in South Asia, we can compare the figures and determine their rankings."
}
{
  "problem_explanation": "Understanding the economic factors that influence GDP per capita, such as population size, economic development, and income distribution. This helps in contextualizing the GDP per capita figures for countries in South Asia.",
  "solution": "Examining the socio-economic context of each country allows for a better understanding of why certain countries have higher GDP per capita than others."
}
{
  "problem_explanation": "Exploring the differences in GDP per capita across different regions and how it affects the overall economic standing of countries within South Asia. This wil

# Tabular Chain Of Thought (Tab-CoT)

In [68]:
class ReasoningStep(BaseModel):
    step: int
    subquestion: str
    procedure: str
    result: str


class TabCoTResponse(BaseModel):
    reasoning: list[ReasoningStep]
    correct_answer: int


def generate_structured_reasoning_response(query: str, context: str):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=TabCoTResponse,
        messages=[
            {
                "role": "developer",
                "content": f"""
                <system>
                    <role>expert Question Answering system</role>
                    <instruction>Make sure to output your reasoning in structured reasoning steps before generating a response to the user's query.</instruction>
                </system>

                <context>
                    {context}
                </context>

                <query>
                    {query}
                </query>
                """,
            },
        ],
    )
    return response


query = "How many computers are now in the server room?"

context = """There were nine computers in the server room. Five more computers were installed each day, from monday to thursday."""

response = generate_structured_reasoning_response(query, context)
print(response.model_dump_json(indent=2))

{
  "reasoning": [
    {
      "step": 1,
      "subquestion": "How many computers were initially in the server room?",
      "procedure": "Identify the initial number of computers mentioned in the context.",
      "result": "There were 9 computers initially."
    },
    {
      "step": 2,
      "subquestion": "How many days were computers installed?",
      "procedure": "Count the days from Monday to Thursday.",
      "result": "Computers were installed for 4 days."
    },
    {
      "step": 3,
      "subquestion": "How many computers were added each day?",
      "procedure": "Identify the number of computers installed each day from the context.",
      "result": "5 computers were installed each day."
    },
    {
      "step": 4,
      "subquestion": "What is the total number of computers installed?",
      "procedure": "Multiply the number of days by the number of computers added each day.",
      "result": "4 days * 5 computers/day = 20 computers installed."
    },
    {
      "st

# Few-shot Contrastive Chain Of Thought

In [72]:
class ChainOfThought(BaseModel):
    chain_of_thought: str
    correct_answer: str


def contrastive_chain_of_thought(
    query: str,
    context: str,
    example_prompt: str,
    correct_examples: list[str],
    incorrect_examples: list[str],
):
    correct_example_prompt = "\n".join(
        [f"<Explanation>{example}</Explanation>" for example in correct_examples]
    )
    incorrect_example_prompt = "\n".join(
        [
            f"<WrongExplanation>{example}</WrongExplanation>"
            for example in incorrect_examples
        ]
    )

    return client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=ChainOfThought,
        messages=[
            {
                "role": "system",
                "content": f"""
                <prompt>
                <role>system</role>
                <context>
                You are an expert question answering AI System.

                You are about to be given some examples of incorrect
                and correct reasoning for a question. You will then
                be asked to correctly reason through another question
                to generate a valid response.
                </context>

                <question>{example_prompt}</question>

                <Explanations>
                    {correct_example_prompt}
                    {incorrect_example_prompt}
                </Explanations>
                <context>{context}</context>
                <question>{query}</question>

                </prompt>
            """,
            }
        ],
    )


context = """
James writes a 3-page letter to 2
different friends twice a week.
"""
query = "How many pages does James write in a year?"

sample_question = """
James has 30 teeth. His dentist drills 4
of them and caps 7 more teeth than he drills.

What percentage of James' teeth does the dentist fix?
"""

incorrect_examples = [
    """James has 30 teeth. The dentist drills and caps some
    teeth. Since drills are normally used on cars and not
    teeth, it's safe to say none of the teeth were actually
    fixed.""",
    """The dentist drills 4 teeth and caps 11 of them, which
    means that he fixes 15 teeth. So we take 15 and multiply
    it by the number of petals on a daisy, and the result is
    30%, which is the percentage of teeth he fixes.""",
]

correct_examples = [
    """The dentist drills 4 teeth, so there are 30 - 4 = 26
    teeth left. The dentist caps 7 more teeth than he drills,
    so he caps 4 + 7 = 11 teeth. Therefore, the dentist fixes
    a total of 4 + 11 = 15 teeth. To find the percentage of
    teeth the dentist fixes, we divide the number of teeth
    fixed by the total number of teeth and multiply by 100:
    15/30 x 100 = 50%"""
]

response = contrastive_chain_of_thought(
    query=query,
    context=context,
    example_prompt=sample_question,
    correct_examples=correct_examples,
    incorrect_examples=incorrect_examples,
)

print(response.model_dump_json(indent=2))

{
  "chain_of_thought": "James writes a 3-page letter to 2 friends twice a week. This means he writes a total of 3 pages * 2 friends = 6 pages per week. In a year, there are 52 weeks, so James writes 6 pages/week * 52 weeks/year = 312 pages in a year.",
  "correct_answer": "312 pages"
}
