In [1]:
from dotenv import load_dotenv
load_dotenv("D:\\gitFolders\\python_de_learners_data\\.env")

True

In [2]:
import openai
import os
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
# reference to function calling

functions = [
    {
        "name": "get_genre",
        "description": "Generates a Genre of fiction for the next story",
        "parameters": {
            "type": "object",
            "properties": {
                "genre": {
                    "type": "string",
                    "description": "the genre of fiction"
                }
            },
            "required": ["genre"]
        }
    },
    {
        "name": "gen_char_names",
        "description": "Generates a list of 10 character names for the story.",
        "parameters": {
            "type": "object",
            "properties": {
                "char_list": {
                    "type": "string",
                    "description": f"this is a list of 10 character names."
                }
            },
            "required": ["char_list"]
        }
    },
    {
        "name": "get_title",
        "description": "Generates a title for the story.",
        "parameters": {
            "type": "object",
            "properties": {
                "title": {
                    "type": "string",
                    "description": "this is the title of the story based on the plot outline provided"
                }
            },
            "required": ["title"]
        }
    },
    {
        "name": "get_author",
        "description": "Generates an author for the story.",
        "parameters": {
            "type": "object",
            "properties": {
                "choice": {
                    "type": "string",
                    "description": "this is the author whos voice will inspire the story based on the plot outline provided"
                }
            },
            "required": ["choice"]
        }
    },
]


In [None]:
response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {
                        "role": "system",
                        "content": f"you are a helpful assistant. Follow the prompt directions exactly. "
                    },
                    {
                        "role": "user",
                        "content": f"develop a list of 10 character names that are different from the following list of names" #Use this prompt as inspiration: {prompt}.."                       
                    }
                ],
                functions=functions,
                function_call={
                    "name": functions[1]["name"]
                },
                temperature=0.9,
                max_tokens=100,
                top_p=1,
                frequency_penalty=1,
                presence_penalty=1,
                n=1,
            )

Define Task

Define Pipeline

Explore examples

Define Data

Define Metric

Collect Zero-shot evals 

Compile DSPy optimizer

Iterate

In [None]:
import dspy
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric

turbo = dspy.OpenAI(model='gpt-3.5-turbo',
                    max_tokens=100)

dspy.settings.configure(lm=turbo)

In [4]:
gsm8k = GSM8K()

gsm_train, gsm_dev = gsm8k.train[:10], gsm8k.dev[:10]

100%|██████████| 7473/7473 [00:00<00:00, 8971.20it/s] 
100%|██████████| 1319/1319 [00:00<00:00, 11882.90it/s]


In [5]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(signature="question -> answer")

    def forward(self, question):
        return self.prog(question=question)

In [7]:
from rich import print

print(gsm_dev[0].items())

In [None]:
print(gsm_dev[0].keys())

In [6]:
cot = CoT()
cot(question="""There is more than 20 cows in the vicinity.
    Each cow can eat 1 square meter of grass in a day. Minimum 
    how many square meter of grass is required for the cows?""")

Prediction(
    rationale='produce the answer. We know that there are more than 20 cows, and each cow can eat 1 square meter of grass in a day. Therefore, the minimum number of square meters of grass required for the cows would be the number of cows multiplied by the amount of grass each cow can eat in a day.',
    answer='The minimum number of square meters of grass required for the cows would be more than 20 square meters.'
)

In [11]:
import inspect
print(inspect.getsource(gsm8k_metric))

In [None]:
gold = dict()
pred = dict()

gold["answer"] ="'produce the answer. We know that there are more than 20 cows, and each cow can eat 1 square meter of grass in a day. Therefore, the minimum number of square meters of grass required for the cows would be the number of cows multiplied by the amount of grass each cow can eat in a day.",
pred['answer'] ="We cannot reliably find the answer with the data given"

gsm8k_metric(gold, pred)

In [13]:
# using teleprompt to optimize
from dspy.teleprompt import BootstrapFewShot

config = dict(max_bootstrapped_demos=2,
              max_labeled_demos=2)

optimizer = BootstrapFewShot(metric=gsm8k_metric,
                             **config)


In [14]:
optimized_cot = optimizer.compile(CoT(), trainset=gsm_train, valset=gsm_dev)

 20%|██        | 2/10 [00:06<00:26,  3.32s/it]

Bootstrapped 2 full traces after 3 examples in round 0.





In [15]:
from dspy.evaluate import Evaluate

# Set up the evaluator, which can be used multiple times.
evaluate = Evaluate(devset=gsm_dev,
                    metric=gsm8k_metric,
                    num_threads=4,
                    display_progress=True,
                    display_table=0)

# Evaluate our `optimized_cot` program.
evaluate(optimized_cot)

Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:08<00:00,  1.16it/s]

Average Metric: 7 / 10  (70.0%)



  df = df.applymap(truncate_cell)


70.0

In [2]:
dir('str')

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'removeprefix',
 'removesuffix',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'stri

In [16]:
turbo.inspect_history(n=2)





Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?
Reasoning: Let's think step by step in order to produce the answer. We know that Ella got 4 incorrect answers out of 40, so she got 36 correct answers. Half of 36 is 18, and 6 more than that is 24. Therefore, Marion got 24 correct answers out of 40.
Answer: 24

---

Question: Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.
Reasoning: Let's think step by step in order to produce the answer. We know that Stephen made 10 round trips up and down the mo