In [1]:
%load_ext autoreload

In [2]:
%autoreload

import sys
import os
import json

# os.environ[ "LANGCHAIN_WANDB_TRACING" ] = "true"
# # wandb documentation to configure wandb using env variables
# # https://docs.wandb.ai/guides/track/advanced/environment-variables
# # here we are configuring the wandb project name
# os.environ[ "WANDB_PROJECT" ] = "deepily-dataframe-agent"

io_path = "/var/genie-in-the-box/src/lib"
if path not in sys.path:
    sys.path.append( path )
else:
    print( f"[{path}] already in sys.path" )

print( sys.path )

import util as du

path = "/var/genie-in-the-box/src"
du.add_to_path( path )

import util_pandas      as up
import util_stopwatch   as sw
import util_code_runner as ucr

import pandas as pd

debug = True
verbose = True


['/Applications/PyCharm.app/Contents/plugins/python/helpers-pro/jupyter_debug', '/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev', '/var/genie-in-the-box/src/notebooks', '/usr/local/lib/python310.zip', '/usr/local/lib/python3.10', '/usr/local/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/site-packages', '/var/genie-in-the-box/src/lib']
Added [/var/genie-in-the-box/src] to sys.path


ModuleNotFoundError: No module named 'util_langchain'

In [8]:
df = pd.read_csv( du.get_project_root() + "/src/conf/long-term-memory/events.csv" )
df = up.cast_to_datetime( df, debug=debug )


------------------------------------------------------------------------------------------------------------------------
- df.dtypes:
------------------------------------------------------------------------------------------------------------------------
start_date                    datetime64[ns]
end_date                      datetime64[ns]
start_time                            object
end_time                              object
event_type                            object
recurrent                               bool
recurrence_interval                   object
priority_level                        object
name                                  object
relationship                          object
description_who_what_where            object
dtype: object


In [9]:
import openai

In [10]:
GPT_4   = "gpt-4-0613"
GPT_3_5 = "gpt-3.5-turbo-0613"

def ask_chat_gpt_text( preamble, query, model=GPT_4, debug=False ):

    openai.api_key = os.getenv( "FALSE_POSITIVE_API_KEY" )
    
    timer = sw.Stopwatch( msg=f"Asking ChatGPT [{model}]...".format( model ) )
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[ 
            { "role": "system", "content": preamble },
            { "role": "user",   "content": query } 
        ],
        temperature=0,
        max_tokens=2400,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    timer.print( use_millis=True )
    
    if debug: print( json.dumps( response, indent=4 ) )
    
    return response[ "choices" ][ 0 ][ "message" ][ "content" ].strip()

preamble = "You are chatGPT, a helpful chatbot"
question = "What's your name?"

ask_chat_gpt_text( preamble, question, model=GPT_4 )

Asking ChatGPT [gpt-4-0613]...
Asking ChatGPT [gpt-4-0613]... in 1,670 ms


'My name is ChatGPT.'

In [18]:
%autoreload

import os
import json

import lib.util           as du
import lib.util_pandas    as dup
import lib.util_stopwatch as sw
import genie_client       as gc

import pandas as pd
import tiktoken

class CalendaringAgent:
    
    def __init__( self, path_to_df, debug=False, verbose=False ):
        
        self.debug                = debug
        self.verbose              = verbose
        self.path_to_df           = du.get_project_root() + path_to_df
        self.df                   = pd.read_csv( self.path_to_df )
        self.df                   = dup.cast_to_datetime( self.df )
    
        self.pandas_system_prompt = self._get_pandas_system_prompt()
        
        self.response_dict        = None 
        self.question             = None
        
    def get_token_count( self, to_be_tokenized, model=GPT_4 ):
        
        encoding   = tiktoken.encoding_for_model( model )
        num_tokens = len( encoding.encode( to_be_tokenized ) )
        
        return num_tokens
        
    def _get_pandas_system_prompt( self ):
        
        csv = self.df.head( 3 ).to_csv( header=True, index=False)
        csv = csv + self.df.tail( 3 ).to_csv( header=False, index=False )
        
        pandas_system_prompt = f"""
        You are an expert software engineer working with a pandas dataframe in Python containing calendaring and events information. The name of the dataframe is `df`.

        This is the ouput from `print(df.head().to_csv())`, in CSV format:

        {csv}
        
        This is the output from `print(self.df.event_type.value_counts())`:

        {self.df.event_type.value_counts()}

        As you generate the python code needed to answer the events and calendaring question asked of you below, I want you to:

        1) Question: Ask yourself if you understand the question that I am asking you.  Pay attention to the details!
        2) Think: Before you do anything, think out loud about what I'm asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!
        3) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of runnning to completion. The last line of your code must be the variable `solution`, which represents the answer. Make sure that any filtering you perform matches the question asked of you by the user!
        4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the object type.
        5) Explain: Briefly and succinctly explain your code in plain English.
        
        Format: return your response as a JSON object in the following fields:
        {{
            "question": "The question, verbatim and without modification, that your code attempts to answer",
            "thoughts": "Your thoughts",
            "code": [],
            "return": "Object type of the variable `solution`",
            "explanation": "A brief explanation of your code",
        }}

        Hint: An event that I have today may have started before today and may end tomorrow or next week, so be careful how you filter on dates.
        Hint: When filtering by dates, use `pd.Timestamp( day )` to convert a Python datetime object into a Pandas `datetime64[ns]` value.
        Hint: If your solution variable is a dataframe, it should include all columns in the dataframe.
        
        Wait until you're presented with the question to begin.
        """
        if self.debug: print( pandas_system_prompt )
        
        count = self.get_token_count( pandas_system_prompt )
        print( f"Token count for pandas_system_prompt: [{count}]" )
        
        return pandas_system_prompt
    
    def run_prompt( self, question ):
        
        # timer = sw.Stopwatch( "Running pandas prompt..." )
        self.question      = question
        self.response      = ask_chat_gpt_text( self.pandas_system_prompt, self.question, model=GPT_4 )
        self.response_dict = json.loads( self.response )
        # timer.print( "Done!" )
        
        if self.debug: print( json.dumps( self.response_dict, indent=4 ) )
        
        return self.response_dict
        
    def run_code( self ):
        
        # Let's copy it until we get the code munging right
        response_dict_copy = self.response_dict[ "code" ][ : ]
        
        self.code_response = ucr.assemble_and_run_solution(
            response_dict_copy, path="/src/conf/long-term-memory/events.csv",
            solution_code_returns=response_dict_copy[ "return" ], debug=False
        )
        if self.debug and self.verbose:
            du.print_banner( "Code output", prepend_nl=True )
            for line in self.code_response[ "output" ].split( "\n" ):
                print( line )
                
        return self.code_response     
    
    def format_output( self ):
        
        preamble     = self.get_formatting_preamble()
        instructions = self.get_formatting_instructions()
        
        self.formatted_output = ask_chat_gpt_text( preamble, instructions, model=GPT_3_5, debug=True )
        
        return self.formatted_output

    def get_formatting_preamble( self ):
        
        rows        = self.code_response[ "output" ].split( "\n" )
        row_count   = len( rows )
        
        lines       = []
        line_number = 1
        
        for row in rows:
            
            lines.append( f"{line_number}) {row}" )
            line_number += 1
            
        lines = "\n".join( lines )
        
        preamble = f"""
        You are an expert in converting raw data into conversational English.
        
        The following {row_count} rows of JSONL formatted data are the output from a query on a pandas dataframe about events on my calendar. 
        
        JSONL output:
        
        {lines}
        """
        return preamble
    
    def get_formatting_instructions( self ):
        
        instructions = f"""
        Reformat and rephrase the JSONL data that I just showed you in conversational English so that it answers this question: `{self.question}`
        
        Use this format: "You have a two hour lunch date with your friend Bob at noon today at Burgerland.
        
        Each line of the output that you create should contain one event.
        There is no need to discuss priority of events.
        If the query returned zero rows of JSONL data, then say "You have no EVENT_TYPE events today/tomorrow/this week, etc."
        """
        return instructions

In [12]:
! pip install tiktoken

[0m

In [19]:
agent         = CalendaringAgent( path_to_df="/src/conf/long-term-memory/events.csv", debug=True )

# response_dict = agent.run_prompt( "What todo items do I have on my calendar for this week?" )
# print( response_dict )


        You are an expert software engineer working with a pandas dataframe in Python containing calendaring and events information. The name of the dataframe is `df`.

        This is the ouput from `print(df.head().to_csv())`, in CSV format:

        start_date,end_date,start_time,end_time,event_type,recurrent,recurrence_interval,priority_level,name,relationship,description_who_what_where
2023-08-01,2023-08-04,00:00,23:59,Concert,False,,none,Jenny,coworker,Concert of Jenny at the city center
2023-08-01,2023-08-01,05:25,17:22,TODO,False,,highest,Gregorio,friend,Send out invitations for the party for Gregorio
2023-08-01,2023-08-01,13:27,01:59,Appointment,False,,high,Leroy Ruiz,father,Appointment with Leroy Ruiz at the clinic
2023-10-01,2023-10-01,00:00,23:59,Birthday,True,3 day,low,Bob,brother,Bob's birthday party at their favorite bar
2023-10-01,2023-10-01,00:00,23:59,Anniversary,True,3 week,highest,Tom Ruiz,brother,Tom Ruiz's anniversary celebration at the park
2023-10-01,2023-10-04

In [25]:
type( agent ) == CalendaringAgent 

True

In [29]:
"Calendar"[ :11 ]

'Calendar'

In [41]:
%autoreload
code_response = agent.run_code()
code_response[ "output" ]

'{"start_date":1695513600000,"end_date":1696032000000,"start_time":"12:01","end_time":"04:55","event_type":"TODO","recurrent":false,"recurrence_interval":null,"priority_level":"medium","name":"Juan","relationship":"neighbor","description_who_what_where":"Prepare presentation for next week\'s meeting for Juan (neighbor)"}\n{"start_date":1695772800000,"end_date":1695772800000,"start_time":"18:55","end_time":"05:58","event_type":"TODO","recurrent":false,"recurrence_interval":null,"priority_level":"low","name":"Pablo","relationship":"friend","description_who_what_where":"Prepare presentation for next week\'s meeting for Pablo"}\n{"start_date":1695945600000,"end_date":1696464000000,"start_time":"16:58","end_time":"13:49","event_type":"TODO","recurrent":false,"recurrence_interval":null,"priority_level":"medium","name":"Juan","relationship":"neighbor","description_who_what_where":"Renew gym membership for Juan (neighbor)"}'

In [42]:
%autoreload
for line in agent.get_formatting_preamble().split( "\n" ):
    print( line )


        You are an expert in converting raw data into conversational English.
        
        The following 3 rows of JSONL formatted data are the output from a query on a pandas dataframe about events on my calendar. 
        
        JSONL output:
        
        1) {"start_date":1695513600000,"end_date":1696032000000,"start_time":"12:01","end_time":"04:55","event_type":"TODO","recurrent":false,"recurrence_interval":null,"priority_level":"medium","name":"Juan","relationship":"neighbor","description_who_what_where":"Prepare presentation for next week's meeting for Juan (neighbor)"}
2) {"start_date":1695772800000,"end_date":1695772800000,"start_time":"18:55","end_time":"05:58","event_type":"TODO","recurrent":false,"recurrence_interval":null,"priority_level":"low","name":"Pablo","relationship":"friend","description_who_what_where":"Prepare presentation for next week's meeting for Pablo"}
3) {"start_date":1695945600000,"end_date":1696464000000,"start_time":"16:58","end_time":"13:49","

In [43]:
for line in agent.get_formatting_instructions().split( "\n" ):
    print( line )


        Reformat and rephrase the JSONL data that I just showed you in conversational English so that it answers this question: `What todo items do I have on my calendar for this week?`
        
        Use this format: "You have a two hour lunch date with your friend Bob at noon today at Burgerland.
        
        Each line of the output that you create should contain one event.
        There is no need to discuss priority of events.
        If the query returned zero rows of JSONL data, then say "You have no EVENT_TYPE events today/tomorrow/this week, etc."
        


In [44]:
agent.format_output()

Asking ChatGPT [gpt-3.5-turbo-0613]...
Asking ChatGPT [gpt-3.5-turbo-0613]... in 6,304 ms


"You have a medium priority task to prepare a presentation for next week's meeting with your neighbor Juan. This task is scheduled from 12:01 PM to 4:55 PM today.\n\nYou have a low priority task to prepare a presentation for next week's meeting with your friend Pablo. This task is scheduled from 6:55 PM today to 5:58 AM tomorrow.\n\nYou have a medium priority task to renew your gym membership for your neighbor Juan. This task is scheduled from 4:58 PM today to 1:49 PM on the day after tomorrow."

In [45]:
for line in agent.formatted_output.split( "\n" ): 
    print( line )

You have a medium priority task to prepare a presentation for next week's meeting with your neighbor Juan. This task is scheduled from 12:01 PM to 4:55 PM today.

You have a low priority task to prepare a presentation for next week's meeting with your friend Pablo. This task is scheduled from 6:55 PM today to 5:58 AM tomorrow.

You have a medium priority task to renew your gym membership for your neighbor Juan. This task is scheduled from 4:58 PM today to 1:49 PM on the day after tomorrow.


In [4]:
path = "/var/genie-in-the-box/src"
if path not in sys.path:
    sys.path.append( path )
    
sys.path

['/Applications/PyCharm.app/Contents/plugins/python/helpers-pro/jupyter_debug',
 '/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev',
 '/var/genie-in-the-box/src/notebooks',
 '/usr/local/lib/python310.zip',
 '/usr/local/lib/python3.10',
 '/usr/local/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/site-packages',
 '/var/genie-in-the-box/src']

In [6]:
import solution_snapshot_mgr as ssm
import lib.util              as du

path_to_snapshots = du.get_project_root() + "/src/conf/long-term-memory/solutions/"
snapshot_mgr = ssm.SolutionSnapshotManager( path_to_snapshots, debug=False )
# print( snapshot_mgr )

snapshot = snapshot_mgr.get_snapshots_by_question( "What concerts do I have this week?" )[ 0 ][ 1 ]

similar_snapshots = snapshot_mgr.get_snapshots_by_code_similarity( snapshot.question, snapshot.code, snapshot.code_embedding, threshold=90.0, limit=-1 )


------------------------------------------------------------------------------------------------------------------------
- Found [32] synonymous questions
------------------------------------------------------------------------------------------------------------------------

Synonymous question [when is juans birthday] for snapshot.question [when is juans birthday]
Synonymous question [when was juan born] for snapshot.question [when is juans birthday]
Synonymous question [what day is today] for snapshot.question [what day is today]
Synonymous question [whats todays date] for snapshot.question [what day is today]
Synonymous question [whats todays day and date] for snapshot.question [what day is today]
Synonymous question [what is the day and date for today] for snapshot.question [what day is today]
Synonymous question [whats the date] for snapshot.question [what day is today]
Synonymous question [say what day is today] for snapshot.question [what day is today]
Synonymous question [hey

In [21]:
snippets = []
i = 1
for snapshot in similar_snapshots:    
    snippet = "\n".join( snapshot[ 1 ].code ) 
    snippet = f"Snippet {i}: \n\n{snippet}"
    print( snippet, end="\n\n" )
    snippets.append( snippet )
    i += 1
    
snippets_string = "\n\n".join( snippets )    

Snippet 1: 

df['start_date'] = pd.to_datetime(df['start_date'])
df['end_date'] = pd.to_datetime(df['end_date'])
now = pd.Timestamp.now()
start_of_week = now - pd.to_timedelta(now.dayofweek, unit='d')
end_of_week = start_of_week + pd.to_timedelta(6, unit='d')
solution = df[(df['event_type'] == 'Concert') & (df['start_date'] <= end_of_week) & (df['end_date'] >= start_of_week)]
print( solution.to_json( orient='records', lines=True ) )

Snippet 2: 

import pandas as pd
from datetime import datetime, timedelta
start_of_week = pd.Timestamp(datetime.now().date() - timedelta(days=datetime.now().weekday()))
end_of_week = start_of_week + pd.DateOffset(days=6)
birthday_mask = df['event_type'] == 'Birthday'
date_mask = (df['start_date'] <= end_of_week) & (df['end_date'] >= start_of_week)
solution = df[birthday_mask & date_mask]
print( solution.to_json( orient='records', lines=True ) )


In [37]:
system_message = f"""
I'm going to show you {len( snippets )} Python code snippets that are similar. How would you coalesce or refactor them so that you only need to run one code snippet for all {len( snippets )} scenarios?

As you generate the python code needed to answer this question, I want you to:

1) Think: Before you do anything, think out loud about what I'm asking you to do, including the steps that you will need to take to solve this problem. Be critical of your thought process!
2) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of running to completion. 
3) Return: Report on the object type returned by your last line of code. Use one word to represent the object type.
4) Test: Generate a verbatim list of code needed to test your code solution
5) Explain: Briefly and succinctly explain your code in plain English.

Format: return your response as a JSON object in the following fields:
{{
    "thoughts": "Your thoughts",
    "code": [],
    "returns": "Object type of the variable `solution`",
    "tests": []
    "explanation": "A brief explanation of your code",
    "error": "Verbatim stack trace or description of issues encountered while attempting to carry out this task."
}}"""
user_message = f"""
{snippets_string}

Begin!
"""

for line in system_message.split( "\n" ):
    print( line )
    
for line in user_message.split( "\n" ):
    print( line )


I'm going to show you 2 Python code snippets that are similar. How would you coalesce or refactor them so that you only need to run one code snippet for all 2 scenarios?

As you generate the python code needed to answer this question, I want you to:

1) Think: Before you do anything, think out loud about what I'm asking you to do, including the steps that you will need to take to solve this problem. Be critical of your thought process!
2) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of running to completion. 
3) Return: Report on the object type returned by your last line of code. Use one word to represent the object type.
4) Test: Generate a verbatim list of code needed to test your code solution
5) Explain: Briefly and succinctly explain your code in plain English.

Format: return your response as a JSON object in the following fields:
{
    "thought

In [38]:
import os 
import json
import openai
import lib.util_stopwatch as sw

GPT_4   = "gpt-4-0613"
GPT_3_5 = "gpt-3.5-turbo-0613"

def query_gpt( preamble, query, model=GPT_4, debug=False ):
    
    openai.api_key = os.getenv( "FALSE_POSITIVE_API_KEY" )
    
    if debug:
        timer = sw.Stopwatch( msg=f"Asking ChatGPT [{model}]...".format( model ) )
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            { "role": "system", "content": preamble },
            { "role": "user", "content": query }
        ],
        temperature=0,
        max_tokens=2000,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    if debug:
        timer.print( use_millis=True )
        print( json.dumps( response, indent=4 ) )
    
    return response[ "choices" ][ 0 ][ "message" ][ "content" ].strip()

response = query_gpt( system_message, user_message, debug=True )
response

Asking ChatGPT [gpt-4-0613]...
Asking ChatGPT [gpt-4-0613]... in 32,907 ms

{
    "id": "chatcmpl-85mITKkUJcx85SLh26k9341VuC4ht",
    "object": "chat.completion",
    "created": 1696387985,
    "model": "gpt-4-0613",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "{\n    \"thoughts\": \"The two snippets are doing similar operations but with different event types. The first snippet is filtering the dataframe for 'Concert' events that are happening within the current week. The second snippet is doing the same but for 'Birthday' events. The main difference is in how they calculate the start and end of the week. The first snippet uses pandas' to_timedelta function while the second uses datetime's timedelta. I can refactor the code by creating a function that takes the event type as a parameter and uses one method to calculate the start and end of the week.\",\n    \"code\": [\n        \"import pand

'{\n    "thoughts": "The two snippets are doing similar operations but with different event types. The first snippet is filtering the dataframe for \'Concert\' events that are happening within the current week. The second snippet is doing the same but for \'Birthday\' events. The main difference is in how they calculate the start and end of the week. The first snippet uses pandas\' to_timedelta function while the second uses datetime\'s timedelta. I can refactor the code by creating a function that takes the event type as a parameter and uses one method to calculate the start and end of the week.",\n    "code": [\n        "import pandas as pd",\n        "from datetime import datetime, timedelta",\n        "def filter_events(df, event_type):",\n        "    now = pd.Timestamp.now()",\n        "    start_of_week = now - pd.to_timedelta(now.dayofweek, unit=\'d\')",\n        "    end_of_week = start_of_week + pd.to_timedelta(6, unit=\'d\')",\n        "    event_mask = df[\'event_type\'] ==

In [39]:
response_dict = json.loads( response )
response_dict


{'thoughts': "The two snippets are doing similar operations but with different event types. The first snippet is filtering the dataframe for 'Concert' events that are happening within the current week. The second snippet is doing the same but for 'Birthday' events. The main difference is in how they calculate the start and end of the week. The first snippet uses pandas' to_timedelta function while the second uses datetime's timedelta. I can refactor the code by creating a function that takes the event type as a parameter and uses one method to calculate the start and end of the week.",
 'code': ['import pandas as pd',
  'from datetime import datetime, timedelta',
  'def filter_events(df, event_type):',
  '    now = pd.Timestamp.now()',
  "    start_of_week = now - pd.to_timedelta(now.dayofweek, unit='d')",
  "    end_of_week = start_of_week + pd.to_timedelta(6, unit='d')",
  "    event_mask = df['event_type'] == event_type",
  "    date_mask = (df['start_date'] <= end_of_week) & (df['e

In [40]:
for line in response_dict[ "code" ]: print( line )

import pandas as pd
from datetime import datetime, timedelta
def filter_events(df, event_type):
    now = pd.Timestamp.now()
    start_of_week = now - pd.to_timedelta(now.dayofweek, unit='d')
    end_of_week = start_of_week + pd.to_timedelta(6, unit='d')
    event_mask = df['event_type'] == event_type
    date_mask = (df['start_date'] <= end_of_week) & (df['end_date'] >= start_of_week)
    solution = df[event_mask & date_mask]
    return solution.to_json(orient='records', lines=True)


In [42]:
import pandas as pd
from datetime import datetime, timedelta
def filter_events(df, event_type):
    now = pd.Timestamp.now()
    start_of_week = now - pd.to_timedelta(now.dayofweek, unit='d')
    end_of_week = start_of_week + pd.to_timedelta(6, unit='d')
    event_mask = df['event_type'] == event_type
    date_mask = (df['start_date'] <= end_of_week) & (df['end_date'] >= start_of_week)
    solution = df[event_mask & date_mask]
    return solution.to_json(orient='records', lines=True)

In [43]:
for line in response_dict[ "tests" ]: print( line )

df = pd.DataFrame({'event_type': ['Concert', 'Birthday', 'Concert', 'Birthday'], 'start_date': ['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06'], 'end_date': ['2022-01-07', '2022-01-08', '2022-01-09', '2022-01-10']})
df['start_date'] = pd.to_datetime(df['start_date'])
df['end_date'] = pd.to_datetime(df['end_date'])
print(filter_events(df, 'Concert'))
print(filter_events(df, 'Birthday'))


In [53]:
df = pd.DataFrame({'event_type': ['Concert', 'Birthday', 'Concert', 'Birthday'], 'start_date': ['2023-10-01', '2023-10-02', '2023-10-03', '2023-10-03'], 'end_date': ['2023-10-07', '2023-10-08', '2023-10-09', '2023-10-10']})
df['start_date'] = pd.to_datetime(df['start_date'])
df['end_date'] = pd.to_datetime(df['end_date'])
print(filter_events(df, 'Concert'))
print(filter_events(df, 'Birthday'))

{"event_type":"Concert","start_date":1696118400000,"end_date":1696636800000}
{"event_type":"Concert","start_date":1696291200000,"end_date":1696809600000}

{"event_type":"Birthday","start_date":1696204800000,"end_date":1696723200000}
{"event_type":"Birthday","start_date":1696291200000,"end_date":1696896000000}


In [54]:
df

Unnamed: 0,event_type,start_date,end_date
0,Concert,2023-10-01,2023-10-07
1,Birthday,2023-10-02,2023-10-08
2,Concert,2023-10-03,2023-10-09
3,Birthday,2023-10-03,2023-10-10


In [55]:
for line in filter_events(df, 'Birthday').split( "\n" ): print( line )

{"event_type":"Birthday","start_date":1696204800000,"end_date":1696723200000}
{"event_type":"Birthday","start_date":1696291200000,"end_date":1696896000000}


In [4]:
from solution_snapshot_mgr import SolutionSnapshotManager
from agent_refactoring     import RefactoringAgent

pwd [/var/genie-in-the-box/src/notebooks]

/Applications/PyCharm.app/Contents/plugins/python/helpers-pro/jupyter_debug
/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev
/usr/local/lib/python3.10
/usr/local/lib/python3.10/lib-dynload
/usr/local/lib/python3.10/site-packages
/usr/local/lib/python310.zip
/var/genie-in-the-box/src
/var/genie-in-the-box/src/lib
/var/genie-in-the-box/src/notebooks


In [113]:
%autoreload

path_to_snapshots = du.get_project_root() + "/src/conf/long-term-memory/solutions/"
snapshot_mgr      = SolutionSnapshotManager( path_to_snapshots, debug=False )
exemplar_snapshot = snapshot_mgr.get_snapshots_by_question( "What concerts do I have this week?" )[ 0 ][ 1 ]
similar_snapshots = snapshot_mgr.get_snapshots_by_code_similarity( exemplar_snapshot, threshold=90.0 )

agent         = RefactoringAgent( similar_snapshots=similar_snapshots, path_to_solutions="/src/conf/long-term-memory/solutions", debug=True, verbose=True )
response_dict = agent.run_prompt()


------------------------------------------------------------------------------------------------------------------------
- Found [32] synonymous questions
------------------------------------------------------------------------------------------------------------------------

Synonymous question [when is juans birthday] for snapshot.question [when is juans birthday]
Synonymous question [when was juan born] for snapshot.question [when is juans birthday]
Synonymous question [what day is today] for snapshot.question [what day is today]
Synonymous question [whats todays date] for snapshot.question [what day is today]
Synonymous question [whats todays day and date] for snapshot.question [what day is today]
Synonymous question [what is the day and date for today] for snapshot.question [what day is today]
Synonymous question [whats the date] for snapshot.question [what day is today]
Synonymous question [say what day is today] for snapshot.question [what day is today]
Synonymous question [hey

In [117]:
%autoreload
agent.update_code( debug=True )

Writing file [/var/genie-in-the-box/src/lib/autogen/util_calendaring_4.py]... Done!
Writing file [/var/genie-in-the-box/io/lib/autogen/util_calendaring_4.py]... Done!

File     count: [4]
File file_name: [util_calendaring_4.py]
File repo_path: [/var/genie-in-the-box/src/lib/autogen/util_calendaring_4.py]
File   io_path: [/var/genie-in-the-box/io/lib/autogen/util_calendaring_4.py]
File    abbrev: [uc4]
File    import: [import lib.autogen.util_calendaring_4 as uc4]

------------------------------------------------------------------------------------------------------------------------
- BEFORE updating `what concerts do i have this week`...
------------------------------------------------------------------------------------------------------------------------

import lib.autogen.util_calendaring_2 as uc2
print(uc2.get_events_by_week_and_type(df, 'Concert'))


Generating embedding for [import lib.autogen.util_calendaring_2 as uc2 print(uc2.get_event...]...
Generating embedding for [import

In [None]:
%autoreload
agent.update_examples( debug=True )

### Write the freshly minted function to both the repo and the Io directories, and create all the meditator necessary to execute the code

In [74]:
import os

def write_code_to_unique_files( lines, agent_src_root, agent_lib_chunk, file_name_prefix, suffix=".py" ):
    
    # Get the list of files in the agent_lib_path directory
    files = os.listdir( agent_src_root + agent_lib_chunk )
    
    # Count the number of files with the name {file_name_prefix}{}{suffix}"
    count = sum( 1 for file in files if file.startswith( file_name_prefix ) and file.endswith( suffix ) )
    
    # Format the file name with the count
    file_name = f"{file_name_prefix}{count}{suffix}"
    util_name = f"{file_name_prefix}{count}" 
    
    # Write the file to the repo path
    repo_path = os.path.join( agent_src_root, agent_lib_chunk, file_name )
    print( f"Writing file [{repo_path}]... ", end="" )
    du.write_lines_to_file( repo_path, lines )
    # Set the permissions of the file to be world-readable and writable
    os.chmod( repo_path, 0o666 )
    print( "Done!" )
    
    # Write the file to the io/execution path
    io_path = f"{du.get_project_root()}/io/{agent_lib_chunk}{file_name}"
    print( f"Writing file [{io_path}]... ", end="" )
    du.write_lines_to_file( io_path, lines )
    # Set the permissions of the file to be world-readable and writable
    os.chmod( io_path, 0o666 )
    print( "Done!", end="\n\n" )
    
    # Build import 'as' string:
    non_digits   = "util_calendaring_2".split( "_" )[ :-1 ]
    first_digits = "".join( [ item[ 0 ] for item in non_digits ] )
    abbrev       = f"{first_digits}{count}"
    as_chunk     = f"as {abbrev}"
    import_str   = f"import {agent_lib_chunk.replace( '/', '.' )}{util_name} {as_chunk}"
    
    results = { 
        "file_name": file_name,
        "repo_path": repo_path,
          "io_path": io_path,
            "count": count,
           "abbrev": abbrev,
           "import": import_str
    }

    return results

# Usage example
agent_src_root  = du.get_project_root() + "/src/" 
agent_lib_chunk = "lib/autogen/"
foo = [ "import os", "import json" ]

code_write_metadata = write_code_to_unique_files( response_dict[ "code" ], agent_src_root, agent_lib_chunk, "util_calendaring_", suffix=".py" )
# 
print( f"File     count: [{code_write_metadata[ 'count' ]}]" )
print( f"File file_name: [{code_write_metadata[ 'file_name' ]}]" )
print( f"File repo_path: [{code_write_metadata[ 'repo_path' ]}]" )
print( f"File   io_path: [{code_write_metadata[ 'io_path' ]}]" )
print( f"File    abbrev: [{code_write_metadata[ 'abbrev' ]}]" )
print( f"File    import: [{code_write_metadata[ 'import' ]}]" )

Writing file [/var/genie-in-the-box/src/lib/autogen/util_calendaring_4.py]... Done!
Writing file [/var/genie-in-the-box/io/lib/autogen/util_calendaring_4.py]... Done!

File     count: [4]
File file_name: [util_calendaring_4.py]
File repo_path: [/var/genie-in-the-box/src/lib/autogen/util_calendaring_4.py]
File   io_path: [/var/genie-in-the-box/io/lib/autogen/util_calendaring_4.py]
File    abbrev: [uc4]
File    import: [import lib.autogen.util_calendaring_4 as uc4]


## Update the examples dictionary so that it contains a list of source code needed to execute the freshly minted function

In [80]:
def update_example_code( refactoring_response_dict, code_metadata, debug=False ):
    
    function_name = refactoring_response_dict[ "function_name" ]
    
    # Update the examples dictionary so that it contains a list of source code needed to execute the freshly minted function
    for key, value in refactoring_response_dict[ "examples" ].items():
        
        if debug: print( f"Before: {key}: {value}" )
        if type( value ) is not list:
            value = value.replace( function_name, f"{code_metadata[ 'abbrev' ]}.{function_name}" )
            value = [ code_metadata[ 'import' ], value ]
        else:
            print( f"No need to update [{value}]" )
        if debug: print( f" After: {key}: {value}" )
        
        refactoring_response_dict[ "examples" ][ key ] = value
        
    return refactoring_response_dict

response_dict = update_example_code( response_dict.copy(), code_write_metadata )
response_dict
    

No need to update [['import lib.autogen.util_calendaring_3 as uc3', "print(uc3.get_events_by_week_and_type(df, 'Concert').to_json(orient='records', lines=True))"]]
No need to update [['import lib.autogen.util_calendaring_3 as uc3', "print(uc3.get_events_by_week_and_type(df, 'Birthday').to_json(orient='records', lines=True))"]]


{'thoughts': 'The two code snippets are very similar. They both filter a DataFrame based on the event type and the date range of the current week. The only difference is the event type they are filtering for. Therefore, we can create a function that takes the event type as a parameter and filters the DataFrame accordingly. We will also need to handle the case where the DataFrame is empty after filtering, in which case we will return an empty DataFrame.',
 'code': ['import pandas as pd',
  'from datetime import datetime, timedelta',
  'def get_events_by_week_and_type(df, event_type):',
  "    df['start_date'] = pd.to_datetime(df['start_date'])",
  "    df['end_date'] = pd.to_datetime(df['end_date'])",
  '    now = pd.Timestamp.now()',
  "    start_of_week = now - pd.to_timedelta(now.dayofweek, unit='d')",
  "    end_of_week = start_of_week + pd.to_timedelta(6, unit='d')",
  "    event_mask = df['event_type'] == event_type",
  "    date_mask = (df['start_date'] <= end_of_week) & (df['end

In [82]:
response_dict[ "examples" ]

{'what concerts do i have this week': ['import lib.autogen.util_calendaring_3 as uc3',
  "print(uc3.get_events_by_week_and_type(df, 'Concert').to_json(orient='records', lines=True))"],
 'what birthdays do i have this week': ['import lib.autogen.util_calendaring_3 as uc3',
  "print(uc3.get_events_by_week_and_type(df, 'Birthday').to_json(orient='records', lines=True))"]}

In [101]:
import solution_snapshot as ss

In [102]:
def update_snapshot_code( snapshots, refactoring_response_dict, debug=False ):
    
    for snapshot in snapshots:
        
        if debug:
            du.print_banner( f"BEFORE updating `{snapshot[ 1 ].question}`...", prepend_nl=False )
            for line in snapshot[ 1 ].code: print( line )
            print( "\n" )
            
        # Update the code, using the question as the key
        new_code = refactoring_response_dict[ "examples" ][ snapshot[ 1 ].question ]
        snapshot[ 1 ].code           = new_code
        snapshot[ 1 ].code_embedding = ss.SolutionSnapshot.generate_embedding( " ".join( new_code ) )
        snapshot[ 1 ].write_to_file()
        
        if debug:
            du.print_banner( f" AFTER updating `{snapshot[ 1 ].question}`...", prepend_nl=False )
            for line in snapshot[ 1 ].code: print( line )
            print( "\n" )
        
    
update_snapshot_code( similar_snapshots, response_dict, debug=True )

------------------------------------------------------------------------------------------------------------------------
- BEFORE updating `what concerts do i have this week`...
------------------------------------------------------------------------------------------------------------------------

import lib.autogen.util_calendaring_3 as uc3
print(uc3.get_events_by_week_and_type(df, 'Concert').to_json(orient='records', lines=True))


Generating embedding for [import lib.autogen.util_calendaring_3 as uc3 print(uc3.get_event...]...
Generating embedding for [import lib.autogen.util_calendaring_3 as uc3 print(uc3.get_event...]... Done! in 322 ms

solution_file value provided: [what-concerts-do-i-have-this-week-0.json]...
File path: /var/genie-in-the-box/src/conf/long-term-memory/solutions/what-concerts-do-i-have-this-week-0.json
------------------------------------------------------------------------------------------------------------------------
-  AFTER updating `what concerts do i hav