In [63]:
!env

LESSOPEN=| /usr/bin/lesspipe %s
NV_LIBCUBLAS_VERSION=12.0.1.189-1
NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-0
MPLBACKEND=module://matplotlib_inline.backend_inline
HOSTNAME=c6ddd78a7aa3
SHLVL=1
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/lib/python3.10/dist-packages/tensorrt:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib/
NV_LIBNCCL_PACKAGE_VERSION=2.16.2-1
HOME=/root
OPENAI_API_KEY=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
PAGER=cat
DOCKER_IMAGE_VERSION=0.1.0
LC_CTYPE=C.UTF-8
NV_LIBNPP_PACKAGE=libnpp-12-0=12.0.0.30-1
CUDA_VERSION=12.0.0
CUDA_VISIBLE_DEVICES=0,1
NV_LIBCUBLAS_PACKAGE_NAME=libcublas-12-0
FORCE_COLOR=1
NVIDIA_REQUIRE_CUDA=cuda>=12.0 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driv

In [14]:
import sys


path = "/var/genie-in-the-box/src/lib"
sys.path.append( path )

import util as du
du.add_to_path( path )

Path [/var/genie-in-the-box/src/lib] already in sys.path


In [51]:
import time

class Stopwatch:

    def __enter__( self ):

        self.start = time.time()

        return self

    def __exit__( self, *args ):

        self.end      = time.time()
        self.interval = int( ( self.end - self.start ) * 1000 )

        print( f'Done in {self.interval:,} milliseconds' )

with Stopwatch() as sw:

    time.sleep( 1 )

Done in 1,001 milliseconds


In [58]:
import os
import glob
import json
from datetime import datetime

import openai
import numpy as np

class SolutionSnapshot:

    @staticmethod
    def get_timestamp():

        now = datetime.now()
        return now.strftime( "%Y-%m-%d @ %H-%M-%S" )

    def __init__( self, problem, created_date=get_timestamp(), updated_date=get_timestamp(), solution_summary="", code="",
                  programming_language="python", language_version="3.10",
                  problem_embedding=[], solution_embedding=[ ], code_embedding=[ ],
                  solution_directory="/src/conf/long-term-memory/solutions/", solution_file=None
        ):

        self.created_date         = created_date
        self.updated_date         = updated_date
        self.problem              = problem
        self.solution_summary     = solution_summary
        self.code                 = code
        self.programming_language = programming_language
        self.language_version     = language_version
        self.solution_directory   = solution_directory
        self.solution_file        = solution_file

        # If the problem embedding is empty, generate it
        if not problem_embedding:
            self.problem_embedding = self._generate_embedding( problem )
        else:
            self.problem_embedding = problem_embedding

        self.solution_embedding   = solution_embedding
        self.code_embedding       = code_embedding


    def set_solution_summary( self, solution_summary ):

        self.solution_summary   = solution_summary
        self.solution_embedding = self._generate_embedding( solution_summary )
        self.updated_date       = self._get_timestamp()

    def set_code( self, code ):

        self.code           = code
        self.code_embedding = self._generate_embedding( code )
        self.updated_date   = self._get_timestamp()

    def _generate_embedding( self, text ):

        print( f"Generating embedding for [{text}]... ", end="" )
        with Stopwatch() as sw:
            response = openai.embeddings.create(
                input=text,
                model="text-embedding-ada-002"
            )
        return response[ "data" ][ 0 ][ "embedding" ]

    def get_problem_similarity( self, other_snapshot ):

        if not self.problem_embedding or not other_snapshot.problem_embedding:
            raise ValueError( "Both snapshots must have a problem embedding to compare." )
        return np.dot( self.problem_embedding, other_snapshot.problem_embedding )

    def get_solution_summary_similarity( self, other_snapshot ):

        if not self.solution_embedding or not other_snapshot.solution_embedding:
            raise ValueError( "Both snapshots must have a solution summary embedding to compare." )

        return np.dot( self.solution_embedding, other_snapshot.solution_embedding )

    def get_code_similarity( self, other_snapshot ):

        if not self.code_embedding or not other_snapshot.code_embedding:
            raise ValueError( "Both snapshots must have a code embedding to compare." )

        return np.dot( self.code_embedding, other_snapshot.code_embedding )

    def to_json( self ):

        return json.dumps( self.__dict__ )

    def write_to_file( self ):

        # Get the project root directory
        project_root = du.get_project_root()
        # Define the directory where the file will be saved
        directory = f"{project_root}{self.solution_directory}"

        if self.solution_file is None:

            print( "NO solution_file value provided (Must be a new object). Generating a unique file name..." )
            # Generate filename based on first 64 characters of the problem
            filename_base = self.problem[ :64 ]
            # Replace any character that is not alphanumeric or underscore with underscore
            filename_base = "".join( c if c.isalnum() else "-" for c in filename_base )
            # Get a list of all files that start with the filename base
            existing_files = glob.glob( f"{directory}{filename_base}-*.json" )
            # The count of existing files will be used to make the filename unique
            file_count = len( existing_files )
            # generate the file name
            filename = f"{filename_base}-{file_count}.json"
            self.solution_file = filename

        else:

            print( f"solution_file value provided: [{self.solution_file}]..." )

        # Generate the full file path
        file_path = f"{directory}{self.solution_file}"
        # Print the file path for debugging purposes
        print( f"File path: {file_path}" )
        # Write the JSON string to the file
        with open( file_path, "w" ) as f:
            f.write( self.to_json() )

    @classmethod
    def from_json_file( cls, filename ):

        with open( filename, 'r' ) as f:
            data = json.load( f )
        return cls( **data )

today     = SolutionSnapshot( problem="what day is today" )
# tommorrow = SolutionSnapshot( problem="what day is tomorrow" )
# blah      = SolutionSnapshot( problem="i feel so blah today" )
# color     = SolutionSnapshot( problem="what color is the sky" )
# date      = SolutionSnapshot( problem="what is today's date" )

today.write_to_file()
# snapshot.to_json()

Generating embedding for [what day is today]... Done in 214 milliseconds
NO solution_file value provided (Must be a new object). Generating a unique file name...
File path: /var/genie-in-the-box/src/conf/long-term-memory/solutions/what-day-is-today-4.json


In [59]:
foo = SolutionSnapshot.from_json_file( "/var/genie-in-the-box/src/conf/long-term-memory/solutions/what-day-is-today-4.json" )
foo

<__main__.SolutionSnapshot at 0x7f7ed9ef9b10>

In [61]:
bar = SolutionSnapshot.from_json_file( "/var/genie-in-the-box/src/conf/long-term-memory/solutions/what-day-is-today-4.json" )
bar

FileNotFoundError: [Errno 2] No such file or directory: '/var/genie-in-the-box/src/conf/long-term-memory/solutions/what-day-is-today-4.json'

In [60]:
foo.write_to_file()

solution_file value provided: [what-day-is-today-4.json]...
File path: /var/genie-in-the-box/src/conf/long-term-memory/solutions/what-day-is-today-4.json


In [34]:
snapshots = [ today, tommorrow, blah, color, date ]

for snapshot in snapshots:

    score = today.get_problem_similarity( snapshot )
    print( f"Score: [{score}] for [{snapshot.problem}] == [{today.problem}]" )

Score: [1.0000000721124376] for [what day is today] == [what day is today]
Score: [0.9080752829638264] for [what day is tomorrow] == [what day is today]
Score: [0.7907710414102591] for [i feel so blah today] == [what day is today]
Score: [0.7562339112211474] for [what color is the sky] == [what day is today]
Score: [0.9328160982290687] for [what is today's date] == [what day is today]


In [62]:
foo = None
if foo:
    print( "foo is True" )
else:
    print( "foo is False" )

foo is False
