In [1]:
# print current working directory

import os   
print( os.getcwd() )
# change current directory
os.chdir( "/var/genie-in-the-box/src/" )
print( os.getcwd() )

/var/genie-in-the-box/src/ephemera/notebooks
/var/genie-in-the-box/src


In [38]:
xml_string = """
<response>
    <question>got the time</question>
    <thoughts>The question is asking for events that are happening today. I need to filter the dataframe by the start and end dates, and return the events that are happening today.</thoughts>
    <code>
        <line>import pandas as pd</line>
        <line>def get_events_today(df):</line>
        <line>    today = pd.Timestamp(pd.Timestamp.today())</line>
        <line>    solution = df[(df['start_date'] <= today) & (df['end_date'] >= today)]</line>
        <line>    return solution</line>
    </code>
    <returns>dataframe</returns>
    <example>solution = get_events_today(df)</example>
    <explanation>The function first gets the current date as a pandas timestamp. Then it filters the dataframe to include only the rows where the start date is less than or equal to today and the end date is greater than or equal to today. The filtered dataframe is then returned as the solution.</explanation>
    <error>None</error>
</response>"""

In [39]:
import xml.etree.ElementTree as et
import traceback
import lib.utils.util as du

def validate_xml(xml_string):

    class MalformedXmlError(Exception):
        pass
    
    def validate( xml_string ):
        try:
            root = et.fromstring( xml_string )
        except et.ParseError as e:
            # print stack trace
            traceback.print_exc()
            raise MalformedXmlError("The XML is malformed.")
        
        expected_tags = ["question", "thoughts", "code", "returns", "example", "explanation", "error"]
        for tag in expected_tags:
            if root.find(tag) is None:
                raise MalformedXmlError(f"The XML is missing the <{tag}> tag.")
        
        code_tag = root.find("code")
        if code_tag is None or len(code_tag.findall("line")) == 0:
            raise MalformedXmlError("The XML is missing the <line> tag inside the <code> tag.")
        
    try:
        validate( xml_string )
    except MalformedXmlError as e:
        du.print_banner( e, expletive=True )
        print( xml_string )
        raise ValueError( e )

# xml_string = """
# <response>
#  <question></question>
#  <thoughts></thoughts>
#  <code>
#     <line></line>
#     <line></line>
#     <line></line>
#  </code>
#  <returns>Object type of the variable `solution`</returns>
#  <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
#  <explanation>Explanation of how the code works</explanation>
#  <error>Description of any issues or errors that you encountered while attempting to fulfill this request</error>
# </response>
# """
validate_xml( xml_string )


¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿
¡@#!-$?%^_¿
¡@#!-$?%^_¿ The XML is malformed.
¡@#!-$?%^_¿
¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿


<response>
    <question>got the time</question>
    <thoughts>The question is asking for events that are happening today. I need to filter the dataframe by the start and end dates, and return the events that are happening today.</thoughts>
    <code>
        <line>import pandas as pd</line>
        <line>def get_events_today(df):</line>
        <line>    today = pd.Timestamp(pd.Timestamp.today())</line>
        <line>    solution = df[(df['start_date'] <= today) & (df['end_date'] >= today)]</line>
        <line>    return solution</line>
    </code>
    <returns>dataframe</returns>
    <example>solution = get_events_today(df)</example>
    <explanation>The function first gets the curren

Traceback (most recent call last):
  File "/tmp/ipykernel_22/440539042.py", line 12, in validate
    root = et.fromstring( xml_string )
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/xml/etree/ElementTree.py", line 1338, in XML
    parser.feed(text)
xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 9, column 51


ValueError: The XML is malformed.

## regex munging

In [7]:
import re

# Original string with numbers
numbers = "1 2 3 4 5 6 7 8 9 10"

# Using regex to remove exactly one space between individual digits
modified_numbers = re.sub( r'(?<=\d) (?=\d)', '', numbers )

modified_numbers


'12345678910'

In [10]:
letters = " a b c d e f g a b c.com"
# Using regex to remove exactly one space between individual letters too
modified_letters = re.sub( r'(?<=\w) (?=\w)', '', letters )
modified_letters

' abcdefgabc.com'

## format output

In [6]:
from huggingface_hub import InferenceClient
import os
# get current working directory
print( os.getcwd() )

# change current directory
os.chdir( "/var/genie-in-the-box/src/" )
print( os.getcwd() )

import lib.utils.util as du
import lib.utils.util_stopwatch as sw

/var/genie-in-the-box/src
/var/genie-in-the-box/src


In [50]:
# preamble ="""
# You are an expert in converting raw data into conversational English and outputting it as XML document.
# 
# The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.
# """
preamble ="""
You are an expert in converting raw data into concise conversational English.

The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.
"""

data_format = "plain text"
question = "What's today's date?"
code_response_dict ={
    "output": "Wednesday, November 29, 2023"
}

instructions = f"""
Rephrase the raw answer in {data_format} format below so that it briefly answers the question below, and nothing more.

Question: {question}
Raw Answer: {code_response_dict[ "output" ]}

Return your answer as a simple xml document with the following fields:
<response>
    <rephrased_answer>Your rephrased answer</rephrased_answer>
</response>
"""
# instructions = f"""
# Rephrase the raw answer in {data_format} format below so that it briefly answers the question below, and nothing more.
# 
# Question: {question}
# Raw Answer: {code_response_dict[ "output" ]}
# """

url = du.get_tgi_server_url_for_this_context()



In [263]:
# PHIND_34B_v2  = "Phind/Phind-CodeLlama-34B-v2"
# #     
# # DEFAULT_MODEL = PHIND_34B_v2
# 
# def query_llm_phind( preamble, instructions, model=PHIND_34B_v2 ):
#     
#     timer = sw.Stopwatch( msg=f"Asking LLM [{model}]...".format( model ) )
#     
#     client         = InferenceClient( du.get_tgi_server_url_for_this_context() )
#     token_list     = [ ]
#     ellipsis_count = 0
#     
#     prompt = f"{preamble}{instructions}\n"
#     print( prompt )
#     
#     for token in client.text_generation(
#         prompt, max_new_tokens=1024, stream=True, stop_sequences=[ "</response>" ], temperature=0.50
#     ):
#         print( token, end="" )
#         token_list.append( token )
#         
#     # print()
#     response = "".join( token_list ).strip()
#     
#     print( f"Token list length [{len( token_list )}]" )
#     timer.print( use_millis=True, prepend_nl=True )
#         # if self.verbose:
#         #     for line in response.split( "\n" ):
#         #         print( line )
#     
#     return response

In [None]:
"""You are an expert in converting raw data into conversational English and outputting it as XML document.

The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.

Rephrase the raw answer in plain text format below so that it briefly answers the question below, and nothing more.

Question: whats todays date
Raw Answer: 2023-11-29

Return your answer as a simple xml document with the following fields:
<response>
    <rephrased_answer>Your rephrased answer</rephrased_answer>
</response>
"""

In [142]:
question = "Show me all birthdays on my calendar this week"
head = """
<?xml version='1.0' encoding='utf-8'?>
<data>
  <row>
    <start_date>2023-09-01 00:00:00</start_date>
    <end_date>2023-09-04 00:00:00</end_date>
    <start_time>00:00</start_time>
    <end_time>23:59</end_time>
    <event_type>concert</event_type>
    <recurrent>False</recurrent>
    <recurrence_interval/>
    <priority_level>none</priority_level>
    <name>Jenny</name>
    <relationship>coworker</relationship>
    <description_who_what_where>Concert of Jenny at the city center</description_who_what_where>
  </row>
  <row>
    <start_date>2023-09-01 00:00:00</start_date>
    <end_date>2023-09-01 00:00:00</end_date>
    <start_time>05:25</start_time>
    <end_time>17:22</end_time>
    <event_type>todo</event_type>
    <recurrent>False</recurrent>
    <recurrence_interval/>
    <priority_level>highest</priority_level>
    <name>Gregorio</name>
    <relationship>friend</relationship>
    <description_who_what_where>Send out invitations for the party for Gregorio</description_who_what_where>
  </row>
  <row>
    <start_date>2023-09-01 00:00:00</start_date>
    <end_date>2023-09-01 00:00:00</end_date>
    <start_time>13:27</start_time>
    <end_time>01:59</end_time>
    <event_type>appointment</event_type>
    <recurrent>False</recurrent>
    <recurrence_interval/>
    <priority_level>high</priority_level>
    <name>Leroy Ruiz</name>
    <relationship>father</relationship>
    <description_who_what_where>Appointment with Leroy Ruiz at the clinic</description_who_what_where>
  </row>
  <row>
    <start_date>2023-11-01 00:00:00</start_date>
    <end_date>2023-11-01 00:00:00</end_date>
    <start_time>00:00</start_time>
    <end_time>23:59</end_time>
    <event_type>birthday</event_type>
    <recurrent>True</recurrent>
    <recurrence_interval>3 day</recurrence_interval>
    <priority_level>low</priority_level>
    <name>Bob</name>
    <relationship>brother</relationship>
    <description_who_what_where>Bob's birthday party at their favorite bar</description_who_what_where>
  </row>
  <row>
    <start_date>2023-11-01 00:00:00</start_date>
    <end_date>2023-11-01 00:00:00</end_date>
    <start_time>00:00</start_time>
    <end_time>23:59</end_time>
    <event_type>anniversary</event_type>
    <recurrent>True</recurrent>
    <recurrence_interval>3 week</recurrence_interval>
    <priority_level>highest</priority_level>
    <name>Tom Ruiz</name>
    <relationship>brother</relationship>
    <description_who_what_where>Tom Ruiz's anniversary celebration at the park</description_who_what_where>
  </row>
  <row>
    <start_date>2023-11-01 00:00:00</start_date>
    <end_date>2023-11-04 00:00:00</end_date>
    <start_time>00:00</start_time>
    <end_time>23:59</end_time>
    <event_type>concert</event_type>
    <recurrent>False</recurrent>
    <recurrence_interval/>
    <priority_level>highest</priority_level>
    <name>John</name>
    <relationship>coworker</relationship>
    <description_who_what_where>Concert of John at the city center</description_who_what_where>
  </row>
</data>
"""
event_types = """
 event_type
conference      24
concert         22
performance     21
appointment     17
anniversary     17
workout         17
birthday        16
workshop        15
todo            14
subscription    14
meeting         12
interview       11
Name: count, dtype: int64
"""

pandas_system_prompt = f"""
You are a cheerfully helpful assistant, with proven expertise in Python using pandas dataframes.
        
Your job is to translate human questions about calendars, dates, and events into working Python code that can be used to answer the question.
        
The name of the events dataframe is `df`.

This is the ouput from `print(df.head().to_xml())`, in XML format:

{head}

This is the output from `print(self.df.event_type.value_counts())`:

{event_types}

BEFORE you generate the python code needed to answer the question below, I want you to:

1) Question: Ask yourself if you understand the question that I am asking you. ` Pay attention to the details!

2) Think: Before you do anything, think out loud about what I am asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!

3) Code: Generate an XML document containing the Python code that you used to arrive at your answer. The code must be complete, syntactically correct, and capable of running to completion. The last line of your code must be be `return solution`.

4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the object type.

5) Example: Create a one line example of how to call your code.

6) Explain: Explain how your code works, including any assumptions that you made.

Hint: An event that I have today may have started before today and may end tomorrow or next week, so be careful how you filter on dates.
Hint: When filtering by dates, use `pd.Timestamp( day )` to convert a Python datetime object into a Pandas `datetime64[ns]` value.
Hint: If your solution variable is a dataframe, it should include all columns in the dataframe.
Hint: If you cannot answer the question, explain why in the `error` field
Hint: Allow for the possibility that your query may return no results.

Question: {question}

Format: You must return your response as a syntactically correct XML document containing the following fields:

<?xml version="1.0" encoding="UTF-8"?>
<response>
    <question>{question}</question>
    <thoughts>Your thoughts</thoughts>
    <code>
        <line>import foo</line>
        <line>def function_name_here( ... ):</line>
        <line>    return solution</line>
    </code>
    <returns>Object type of the variable `solution`</returns>
    <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
    <explanation>Explanation of how the code works</explanation>
    <error>Description of any issues or errors that you encountered while attempting to fulfill this request</error>
</response>
"""

In [254]:
# import lib.utils.util_pandas as dup
# import pandas as pd
# 
# path_to_df = du.get_project_root() + "/src/conf/long-term-memory/events.csv"
# df = pd.read_csv( path_to_df )
# df = dup.cast_to_datetime( df )

In [255]:
# def get_df_metadata( df ):
#     
#     head = df.head( 3 ).to_xml( index=False )
#     head = head + df.tail( 3 ).to_xml( index=False )
#     head = head.replace( "root>", "events>" ).replace( "<?xml version='1.0' encoding='utf-8'?>", "" )
#     
#     event_value_counts = df.event_type.value_counts()
#     
#     return head, event_value_counts
#     
# get_df_metadata( df )

In [256]:
# def initialize_prompt_components( df, question ):
#         
#     head, event_value_counts = get_df_metadata( df )
#     
#     step_1 = f"""
#     You are a cheerfully and helpful assistant, with proven expertise using Python to query pandas dataframes.
#             
#     Your job is to translate human questions about calendars, dates, and events into a self-contained Python functions that can be used to answer the question now and reused in the future.
#             
#     About the Pandas dataframe: The name of the events dataframe is `df` and is already loaded in memory ready to be queried.
#     About the Python libraries that you will use: You may only use these libraries: datetime and Pandas. The Pandas library has been imported as `pd`, and the datetime library has been imported as `datetime`.
#     
#     Here are some hints to keep in mind and guide you as you craft your solution:
#     Start and end dates: An event that I have today may have started before today and may end tomorrow or next week, so be careful how you filter on dates.
#     Filtering: When filtering by dates, use `pd.Timestamp( day )` to convert a Python datetime object into a Pandas `datetime64[ns]` value.
#     Return values: You should always return a dataframe, and it must always include all columns in the dataframe, and never a subset.
#     
#     This is the ouput from `print(df.head().to_xml())`, in XML format:
#     
#     {head}
#     
#     This is the output from `print(self.df.event_type.value_counts())`:
#     
#     {event_value_counts}
#     
#     Given the context I have provided above, I want you to write a Python function to answer the following question:
#     
#     Question: `{question}`
#     
#     In order to successfully write a function that answers the question above, you must follow my instructions step by step. As you complete each step I will recount your progress on the previous steps and provide you with the next step's instructions.
#     
#     Step one) Think: think out loud about what you are being asked, including what are the steps that you will need to take in your code to solve this problem. Be critical of your thought process! And make sure to consider what you will call the entry point to your python solution, such as `def get_events_for_today( df )`, or `def get_events_for_tomorrow( df )`, or `def get_events_for_this_week( df )` or even `def get_birthday_for( df, name )`.
#     """
#     xml_formatting_instructions_step_1 = """
#     You must respond to the step one directive using the following XML format:
#     <response>
#         <thoughts>Your thoughts</thoughts>
#     </response>
#     
#     Begin!
#     """
#     
#     step_2 = """
#     In response to the instructions that you received for step one you replied:
#     
#     {response}
#     
#     Step two) Code: Now that you have thought about how you are going to solve the problem, it's time to generate the Python code that you will use to arrive at your answer. The code must be complete, syntactically correct, and capable of running to completion. The last line of your function code must be `return solution`.  Remember: You must never return a subset of a dataframe's columns.
#     """
#     xml_formatting_instructions_step_2 = """
#     You must respond to the step 2 directive using the following XML format:
#     <response>
#         <code>
#             <line>def function_name_here( df, arg1, arg2 ):</line>
#             <line>    ...</line>
#             <line>    ...</line>
#             <line>    return solution</line>
#         </code>
#     </response>
#     
#     Begin!
#     """
#     
#     step_3 = """
#     In response to the instructions that you received for step two, you replied:
#     
#     {response}
#     
#     Now that you have generated the code, you will need to perform the following three steps:
#     
#     Step three) Return: Report on the object type of the variable `solution` returned in your last line of code. Use one word to represent the object type.
#     
#     Step four) Example: Create a one line example of how to call your code.
#     
#     Step five) Explain: Explain how your code works, including any assumptions that you have made.
#     """
#     xml_formatting_instructions_step_3 = """
#     You must respond to the directives in steps three, four and five using the following XML format:
#     
#     <response>
#         <returns>Object type of the variable `solution`</returns>
#         <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
#         <explanation>Explanation of how the code works</explanation>
#     </response>
#     
#     Begin!
#     """
#     
#     step_4 = """
#     In response to the instructions that you received for step three, you replied:
#     
#     {response}
#     
#     Congratulations! We're finished 😀
#     
#     """
#     
#     prompt_components = {    
#                               "steps" : [ step_1, step_2, step_3, step_4 ],
#                           "responses" : [ ],
#                  "response_tag_names" : [ [ "thoughts" ], [ "code" ], [ "returns", "example", "explanation" ] ],
#                       "response_dict" : OrderedDict(),
#                     "running_history" : "",
#         "xml_formatting_instructions" : [ 
#             xml_formatting_instructions_step_1, xml_formatting_instructions_step_2, xml_formatting_instructions_step_3 
#         ]
#     }   
#     
#     return prompt_components

In [233]:
# def _get_code( xml_string, debug=False ):
#     
#     # if debug:
#     #     du.print_banner( "get_code called..." )
#     #     print( f"xml_string [{xml_string}]" )
# 
#     skip_list = [ "import pandas", "import datetime" ]
#     
#     # Matches all text between the opening and closing line tags, including the white space after the opening line tag
#     pattern   = re.compile( r"<line>(.*?)</line>" )
#     code      = du.get_value_by_xml_tag_name( xml_string, "code" )
#     code_list = []
# 
#     for line in code.split( "\n" ):
# 
#         match = pattern.search( line )
#         
#         for skip in skip_list:
#             if skip in line:
#                 if debug: print( f"[SKIPPING '{skip}']" )
#                 match = None
#                 break
#                 
#         if match:
#             line = match.group( 1 )
#             code_list.append( line )
#             if debug: print( line )
#         else:
#             code_list.append( "" )
#             if debug: print( "[]" )
# 
#     return code_list

In [234]:
import re
foo = """
<response>
    <code>
        <line>def get_birthdays_this_week( df ):</line>
        <line>    import datetime</line>
        <line>    today = datetime.datetime.now()</line>
        <line>    start_date = today - datetime.timedelta(days=today.weekday())</line>
        <line>    end_date = start_date + datetime.timedelta(days=7)</line>
        <line></line>
        <line>    birthdays = df[(df['event_type'] == 'birthday') & (df['start_date'].between(start_date, end_date))]</line>
        <line>    solution = birthdays[['name', 'description_who_what_where']]</line>
        <line>    return solution</line>
    </code>
</response>
"""
# _ = _get_code( foo, debug=False )

In [235]:
# def update_response_dictionary( step, response, response_dict, tag_names, debug=True ):
#     
#     if debug: print( f"update_response_dictionary called with step [{step}]..." )
#     
#     # Parse response and update response dictionary
#     xml_tags_for_step_n = tag_names[ step ]
#     for xml_tag in xml_tags_for_step_n:
#         
#         if debug: print( f"Looking for xml_tag [{xml_tag}]" )
#         
#         if xml_tag == "code":
#             # the get_code method expects enclosing tags
#             xml_string = "<code>" + du.get_value_by_xml_tag_name( response, xml_tag ) + "</code>"
#             response_dict[ xml_tag ] = _get_code( xml_string, debug=debug )
#         else:
#             response_dict[ xml_tag ] = du.get_value_by_xml_tag_name( response, xml_tag ).strip()
#         
#     return response_dict

In [243]:
# def run_prompt():
# 
#     question                    = "What birthdays do I have on my calendar this week?"
#     
#     prompt_components           = initialize_prompt_components( df, question )
#     
#     steps                       = prompt_components[ "steps" ]
#     xml_formatting_instructions = prompt_components[ "xml_formatting_instructions" ]
#     response_tag_names          = prompt_components[ "response_tag_names" ]
#     responses                   = prompt_components[ "responses" ]
#     response_dict               = prompt_components[ "response_dict" ]
#     running_history             = prompt_components[ "running_history" ]
#     
#     for step in range( len( steps ) ):
#         
#         if step == 0:
#             # the first step doesn't have any previous responses to incorporate into it
#             running_history = steps[ step ]
#         else:
#             # incorporate the previous response into the current step, append it to the running history    
#             running_history = running_history + steps[ step ].format( response=responses[ step - 1 ] )
#             
#         # we're not going to execute the last step, it's been added just to keep the running history current
#         if step != len( steps ) - 1:
#             
#             response = query_llm_phind( running_history, xml_formatting_instructions[ step ] )
#             responses.append( response )
#             
#             # Incrementally update the contents of the response dictionary according to the results of the XML-esque parsing 
#             response_dict = update_response_dictionary( step, response, response_dict, response_tag_names, debug=False )
# 
#     prompt_components[ "running_history" ] = running_history
#     
#     return prompt_components

# prompt_components = run_prompt()
# du.print_banner( "DONE! response_dict[ 'code' ]" )
# du.print_list( response_dict[ "code" ] )

Asking LLM [Phind/Phind-CodeLlama-34B-v2]...

    You are a cheerfully and helpful assistant, with proven expertise using Python to query pandas dataframes.
            
    Your job is to translate human questions about calendars, dates, and events into a self-contained Python functions that can be used to answer the question now and reused in the future.
            
    About the Pandas dataframe: The name of the events dataframe is `df` and is already loaded in memory ready to be queried.
    About the Python libraries that you will use: You may only use these libraries: datetime and Pandas. The Pandas library has been imported as `pd`, and the datetime library has been imported as `datetime`.
    
    Here are some hints to keep in mind and guide you as you craft your solution:
    Start and end dates: An event that I have today may have started before today and may end tomorrow or next week, so be careful how you filter on dates.
    Filtering: When filtering by dates, use `pd.Ti

In [4]:
from lib.agents.agent import Agent
from lib.agents.agent_calendaring import DataQueryingAgent
import re

In [None]:
class CalendaringAgentIterative( DataQueryingAgent ):
    
    PHIND_34B_v2  = "Phind/Phind-CodeLlama-34B-v2"
    
    def __init__( self, path_to_df, question="", default_model=Agent.PHIND_34B_v2, push_counter=-1, debug=False, verbose=False ):
        
        super().__init__( path_to_df, question=question, default_model=default_model, push_counter=push_counter, debug=debug, verbose=verbose )
        
        self.token_count       = 0
        self.prompt_components = None
        self.question          = question
        self.prompt_components = self._initialize_prompt_components( self.df, self.question )
        
    def _initialize_prompt_components( self, df, question ):
        
        head, event_value_counts = self._get_df_metadata( df )
        
        # The only Python libraries that you may use: You must only use the datetime and Pandas libraries, which have been imported in the following manner: `import pandas as pd` and`import datetime as dt`.
        step_1 = f"""
        You are a cheerfully and helpful assistant, with proven expertise using Python to query pandas dataframes.
                
        Your job is to translate human questions about calendars, dates, and events into a self-contained Python functions that can be used to answer the question now and reused in the future.
                
        About the Pandas dataframe: The name of the events dataframe is `df` and is already loaded in memory ready to be queried.
        
        Here are some hints to keep in mind and guide you as you craft your solution:
        Start and end dates: An event that I have today may have started before today and may end tomorrow or next week, so be careful how you filter on dates.
        Filtering: When filtering by dates, use `pd.Timestamp( day )` to convert a Python datetime object into a Pandas `datetime64[ns]` value.
        Return values: You should always return a dataframe, and it must always include all columns in the dataframe, and never a subset.
        
        This is the ouput from `print(df.head().to_xml())`, in XML format:
        {head}
        
        This is the output from `print(self.df.event_type.value_counts())`:
        
        {event_value_counts}
        
        Given the context I have provided above, I want you to write a Python function to answer the following question:
        
        Question: `{question}`
        
        In order to successfully write a function that answers the question above, you must follow my instructions step by step. As you complete each step I will recount your progress on the previous steps and provide you with the next step's instructions.
        
        Step one) Think: think out loud about what you are being asked, including what are the steps that you will need to take in your code to solve this problem. Be critical of your thought process! And make sure to consider what you will call the entry point to your python solution, such as `def get_events_for_today( df )`, or `def get_events_for_tomorrow( df )`, or `def get_events_for_this_week( df )` or even `def get_birthday_for( df, name )`.
        """
        xml_formatting_instructions_step_1 = """
        You must respond to the step one directive using the following XML format:
        <response>
            <thoughts>Your thoughts</thoughts>
        </response>
        
        Begin!
        """
        
        step_2 = """
        In response to the instructions that you received for step one you replied:
        
        {response}
        
        Step two) Code: Now that you have thought about how you are going to solve the problem, it's time to generate the Python code that you will use to arrive at your answer. The code must be complete, syntactically correct, and capable of running to completion. The last line of your function code must be `return solution`.  Remember: You must never return a subset of a dataframe's columns. 
        """
        xml_formatting_instructions_step_2 = """
        You must respond to the step 2 directive using the following XML format:
        <response>
            <code>
                <line>def function_name_here( df, arg1, arg2 ):</line>
                <line>    ...</line>
                <line>    ...</line>
                <line>    return solution</line>
            </code>
        </response>
        
        Begin!
        """
        
        step_3 = """
        In response to the instructions that you received for step two, you replied:
        
        {response}
        
        Now that you have generated the code, you will need to perform the following three steps:
        
        Step three) Return: Report on the object type of the variable `solution` returned in your last line of code. Use one word to represent the object type.
        
        Step four) Example: Create a one line example of how to call your code.
        
        Step five) Explain: Explain how your code works, including any assumptions that you have made.
        """
        xml_formatting_instructions_step_3 = """
        You must respond to the directives in steps three, four and five using the following XML format:
        
        <response>
            <returns>Object type of the variable `solution`</returns>
            <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
            <explanation>Explanation of how the code works</explanation>
        </response>
        
        Begin!
        """
        
        step_4 = """
        In response to the instructions that you received for step three, you replied:
        
        {response}
        
        Congratulations! We're finished 😀
        
        """
        
        prompt_components = {    
                                  "steps" : [ step_1, step_2, step_3, step_4 ],
                              "responses" : [ ],
                     "response_tag_names" : [ [ "thoughts" ], [ "code" ], [ "returns", "example", "explanation" ] ],
                        "running_history" : "",
            "xml_formatting_instructions" : [ 
                xml_formatting_instructions_step_1, xml_formatting_instructions_step_2, xml_formatting_instructions_step_3 
            ]
        }   
        
        return prompt_components   
    
    def _get_df_metadata( self, df ):
    
        head = df.head( 3 ).to_xml( index=False )
        head = head + df.tail( 3 ).to_xml( index=False )
        head = head.replace( "data>", "events>" ).replace( "<?xml version='1.0' encoding='utf-8'?>", "" )
        
        event_value_counts = df.event_type.value_counts()
        
        return head, event_value_counts
    
    def run_prompt( self ):
    
        self.token_count            = 0
        timer                       = sw.Stopwatch( msg=f"Running iterative prompt with {len( self.prompt_components[ 'steps' ] )} steps..." )
        prompt_response_dict               = {}
        
        steps                       = self.prompt_components[ "steps" ]
        xml_formatting_instructions = self.prompt_components[ "xml_formatting_instructions" ]
        response_tag_names          = self.prompt_components[ "response_tag_names" ]
        responses                   = self.prompt_components[ "responses" ]
        running_history             = self.prompt_components[ "running_history" ]
        
        for step in range( len( steps ) ):
            
            if step == 0:
                # the first step doesn't have any previous responses to incorporate into it
                running_history = steps[ step ]
            else:
                # incorporate the previous response into the current step, append it to the running history    
                running_history = running_history + steps[ step ].format( response=responses[ step - 1 ] )
                
            # we're not going to execute the last step, it's been added just to keep the running history current
            if step != len( steps ) - 1:
                
                response = self._query_llm_phind( running_history, xml_formatting_instructions[ step ] )
                responses.append( response )
                
                # Incrementally update the contents of the response dictionary according to the results of the XML-esque parsing 
                prompt_response_dict = self._update_response_dictionary( step, response, prompt_response_dict, response_tag_names, debug=False )
    
        self.prompt_components[ "running_history" ] = running_history
        self.prompt_response_dict = prompt_response_dict
        
        timer.print( "Done!", use_millis=True, prepend_nl=False )
        tokens_per_second = self.token_count / ( timer.get_delta_ms() / 1000.0 )
        print( f"Tokens per second [{round( tokens_per_second, 1 )}]" )
        
        return self.prompt_response_dict

    def _query_llm_phind( self, preamble, instructions, model=PHIND_34B_v2, temperature=0.50, max_new_tokens=1024, debug=False ):
    
        timer = sw.Stopwatch( msg=f"Asking LLM [{model}]..." )
        
        client         = InferenceClient( du.get_tgi_server_url_for_this_context() )
        token_list     = [ ]
        ellipsis_count = 0
        
        prompt = f"{preamble}{instructions}\n"
        print( prompt )
        
        for token in client.text_generation(
            prompt, max_new_tokens=max_new_tokens, stream=True, stop_sequences=[ "</response>" ], temperature=temperature
        ):
            print( token, end="" )
            token_list.append( token )
            
        response         = "".join( token_list ).strip()
        self.token_count = self.token_count + len( token_list )
        
        print()
        print( f"Response tokens [{len( token_list )}]" )
        timer.print( "Done!", use_millis=True, prepend_nl=True )
        
        return response
    
    def _update_response_dictionary( self, step, response, prompt_response_dict, tag_names, debug=True ):
    
        if debug: print( f"update_response_dictionary called with step [{step}]..." )
        
        # Parse response and update response dictionary
        xml_tags_for_step_n = tag_names[ step ]
        
        for xml_tag in xml_tags_for_step_n:
            
            if debug: print( f"Looking for xml_tag [{xml_tag}]" )
            
            if xml_tag == "code":
                # the get_code method expects enclosing tags
                xml_string = "<code>" + du.get_value_by_xml_tag_name( response, xml_tag ) + "</code>"
                prompt_response_dict[ xml_tag ] = self._get_code( xml_string, debug=debug )
            else:
                prompt_response_dict[ xml_tag ] = du.get_value_by_xml_tag_name( response, xml_tag ).strip()

        return prompt_response_dict
    
    def _get_code( self, xml_string, debug=False ):
    
        # if debug:
        #     du.print_banner( "get_code called..." )
        #     print( f"xml_string [{xml_string}]" )
    
        skip_list = []#[ "import pandas", "import datetime" ]
        
        # Matches all text between the opening and closing line tags, including the white space after the opening line tag
        pattern   = re.compile( r"<line>(.*?)</line>" )
        code      = du.get_value_by_xml_tag_name( xml_string, "code" )
        code_list = []
    
        for line in code.split( "\n" ):
    
            match = pattern.search( line )
            
            for skip in skip_list:
                if skip in line:
                    if debug: print( f"[SKIPPING '{skip}']" )
                    match = None
                    break
                    
            if match:
                line = match.group( 1 )
                line = line.replace( "&gt;", ">" ).replace( "&lt;", "<" ).replace( "&amp;", "&" )
                code_list.append( line )
                if debug: print( line )
            else:
                code_list.append( "" )
                if debug: print( "[]" )
    
        return code_list
    
if __name__ == "__main__":
    
    path_to_df    = "/src/conf/long-term-memory/events.csv"
    # question      = "What birthdays do I have on my calendar this week?"
    question      = "What's today's date?"
    agent         = CalendaringAgentIterative( path_to_df, question=question, debug=True, verbose=False )
    prompt_response_dict = agent.run_prompt()
    
    # agent.print_code()
    
    code_response    = agent.run_code()
    du.print_banner( "code_response:", prepend_nl=False )
    print( code_response )
    # formatted_output = agent.format_output()
    # 
    # du.print_banner( question, prepend_nl=False )
    # for line in formatted_output.split( "\n" ):
    #     print( line )
    #     
    # # code_response_dict = prompt_response_dict[ "code" ]
    # 
    # du.print_banner( "Done! prompt_response_dict:", prepend_nl=True )
    # print( prompt_response_dict )
    