## validate xml

In [18]:
# print current working directory
import os   
print( os.getcwd() )
# change current directory
os.chdir( "/var/genie-in-the-box/src/" )
print( os.getcwd() )

/var/genie-in-the-box/src/ephemera/notebooks
/var/genie-in-the-box/src


In [19]:
import xml.etree.ElementTree as et
import lib.utils.util as du

def validate_xml(xml_string):

    class MalformedXmlError(Exception):
        pass
    
    def validate( xml_string ):
        try:
            root = et.fromstring( xml_string )
        except et.ParseError:
            raise MalformedXmlError("The XML is malformed.")
        
        expected_tags = ["question", "thoughts", "code", "returns", "example", "explanation", "error"]
        for tag in expected_tags:
            if root.find(tag) is None:
                raise MalformedXmlError(f"The XML is missing the <{tag}> tag.")
        
        code_tag = root.find("code")
        if code_tag is None or len(code_tag.findall("line")) == 0:
            raise MalformedXmlError("The XML is missing the <line> tag inside the <code> tag.")
        
    try:
        validate( xml_string )
    except MalformedXmlError as e:
        du.print_banner( e, expletive=True )
        print( xml_string )
        raise ValueError( e )

xml_string = """
<response>
 <question></question>
 <thoughts></thoughts>
 <code>
    <line></lin>
    <line></line>
    <line></line>
 </code>
 <returns>Object type of the variable `solution`</returns>
 <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
 <explanation>Explanation of how the code works</explanation>
 <error>Description of any issues or errors that you encountered while attempting to fulfill this request</error>
</response>
"""

validate_xml( xml_string )


¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿
¡@#!-$?%^_¿
¡@#!-$?%^_¿ The XML is malformed.
¡@#!-$?%^_¿
¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿¡@#!-$?%^_¿


<response>
 <question></question>
 <thoughts></thoughts>
 <code>
    <line></lin>
    <line></line>
    <line></line>
 </code>
 <returns>Object type of the variable `solution`</returns>
 <example>One-line example of how to call your code: solution = function_name_here( arguments )</example>
 <explanation>Explanation of how the code works</explanation>
 <error>Description of any issues or errors that you encountered while attempting to fulfill this request</error>
</response>


ValueError: The XML is malformed.

In [7]:
import re

# Original string with numbers
numbers = "1 2 3 4 5 6 7 8 9 10"

# Using regex to remove exactly one space between individual digits
modified_numbers = re.sub( r'(?<=\d) (?=\d)', '', numbers )

modified_numbers


'12345678910'

In [10]:
letters = " a b c d e f g a b c.com"
# Using regex to remove exactly one space between individual letters too
modified_letters = re.sub( r'(?<=\w) (?=\w)', '', letters )
modified_letters

' abcdefgabc.com'

## format output

In [9]:
from huggingface_hub import InferenceClient
import os
# get current working directory
print( os.getcwd() )

# change current directory
os.chdir( "/var/genie-in-the-box/src/" )
print( os.getcwd() )

import lib.utils.util as du
import lib.utils.util_stopwatch as sw

/var/genie-in-the-box/src
/var/genie-in-the-box/src


In [50]:
# preamble ="""
# You are an expert in converting raw data into conversational English and outputting it as XML document.
# 
# The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.
# """
preamble ="""
You are an expert in converting raw data into concise conversational English.

The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.
"""

data_format = "plain text"
question = "What's today's date?"
code_response_dict ={
    "output": "Wednesday, November 29, 2023"
}

instructions = f"""
Rephrase the raw answer in {data_format} format below so that it briefly answers the question below, and nothing more.

Question: {question}
Raw Answer: {code_response_dict[ "output" ]}

Return your answer as a simple xml document with the following fields:
<response>
    <rephrased_answer>Your rephrased answer</rephrased_answer>
</response>
"""
# instructions = f"""
# Rephrase the raw answer in {data_format} format below so that it briefly answers the question below, and nothing more.
# 
# Question: {question}
# Raw Answer: {code_response_dict[ "output" ]}
# """

url = du.get_tgi_server_url()



In [51]:
PHIND_34B_v2  = "Phind/Phind-CodeLlama-34B-v2"
    
DEFAULT_MODEL = PHIND_34B_v2

def query_llm_phind( preamble, instructions, model=DEFAULT_MODEL ):
    
    timer = sw.Stopwatch( msg=f"Asking LLM [{model}]...".format( model ) )
    
    client         = InferenceClient( du.get_tgi_server_url() )
    token_list     = [ ]
    ellipsis_count = 0
    
    prompt = f"{preamble}\n{instructions}\n"
    print( prompt )
    
    for token in client.text_generation(
        prompt, max_new_tokens=1024, stream=True, stop_sequences=[ "</response>", "..</s>" ], temperature=1.0
    ):
        print( token, end="" )
        token_list.append( token )
        
    # print()
    response = "".join( token_list ).strip()
    
    timer.print( use_millis=True, prepend_nl=True )
    print( f"Token list length [{len( token_list )}]" )
        # if self.verbose:
        #     for line in response.split( "\n" ):
        #         print( line )
    
    return response

In [52]:
response = query_llm_phind( preamble, instructions )

Asking LLM [Phind/Phind-CodeLlama-34B-v2]...

You are an expert in converting raw data into concise conversational English.

The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.


Rephrase the raw answer in plain text format below so that it briefly answers the question below, and nothing more.

Question: What's today's date?
Raw Answer: Wednesday, November 29, 2023

Return your answer as a simple xml document with the following fields:
<response>
    <rephrased_answer>Your rephrased answer</rephrased_answer>
</response>


<response>
    <rephrased_answer>Today's date is November 29, 2023.</rephrased_answer>
</response>
Asking LLM [Phind/Phind-CodeLlama-34B-v2]... in 1,260 ms

Token list length [40]


In [46]:
rephrased_answer = du.get_value_by_xml_tag_name( response, "rephrased_answer", default_value=response )
rephrased_answer

"Today's date is November 29, 2023."

In [None]:
"""You are an expert in converting raw data into conversational English and outputting it as XML document.

The answer below is the result of a query on a pandas dataframe about events, dates, and times on my calendar.

Rephrase the raw answer in plain text format below so that it briefly answers the question below, and nothing more.

Question: whats todays date
Raw Answer: 2023-11-29

Return your answer as a simple xml document with the following fields:
<response>
    <rephrased_answer>Your rephrased answer</rephrased_answer>
</response>
"""