In [1]:
from generator import *

In [2]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
checklist = Checklist('../../checklist/checklist.csv', checklist_format=ChecklistFormat.CSV)

In [3]:
generator = TestGenerator(llm, checklist=checklist)
# generator.prompt = PromptTemplate(
#     template="You are an expert Machine Learning Engineer.\n"
#              "Please generate empty test functions with numpy-format docstring based corresponding requirement of given checklist.\n"
#              #"{format_instructions}\n" # FIXME: define python function format
#              "Here is the checklist as a list of JSON objects:\n```{checklist}```\n",
#     description="Test Specification Generation for Machine Learning Project",
#     input_variables=["checklist"],
# )

In [4]:
result = generator.generate_spec()
print(generator.spec[1]['Function'])

def test_data_in_expected_format():
    """Verifies that the data to be ingested matches the format expected by processing algorithms (like pd.DataFrame for CSVs or np.array for images) and adheres to the expected schema."""
    pass


'{"ID": "2.1", "Title": "Ensure Data File Loads as Expected", "Requirement": "Ensure that data-loading functions correctly load files when they exist and match the expected format, handle non-existent files appropriately, and return the expected results."}'

In [31]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"id": {"description": "The corresponding `ID` of the checklist item provided", "title": "Id", "type": "string"}, "title": {"description": "The corresponding `Title` of the checklist item provided", "title": "Title", "type": "string"}, "Function": {"description": "A test function with the docstring of numpy format", "title": "Function", "type": "string"}}, "required": ["id", "title", "Function"]}
```


In [44]:
from typing import Callable, Any
from langchain.output_parsers import PydanticOutputParser

class TestSpec(BaseModel):
    ID: str = Field(description="The corresponding `ID` of the checklist item provided")
    Title: str = Field(description="The corresponding `Title` of the checklist item provided")
    Function: str = Field(description="A Python test function with the docstring of numpy format") # FIXME: define python function format

class TestSpecs(BaseModel):
    results: List[TestSpec]

parser = JsonOutputParser(pydantic_object=TestSpecs)
        
prompt = PromptTemplate(
    template="You are an expert Machine Learning Engineer.\n"
             "Please generate Python test functions based on corresponding requirement of given checklist, with docstring of numpy format.\n"
             "{format_instructions}\n" 
             "Here is the checklist as a list of JSON objects:\n```{checklist}```\n",
             #"Here is the code to be analyzed:\n{context}",
    description="Test Specification Generation for Machine Learning Project",
    #input_variables=["checklist", "context"],
    input_variables=["checklist"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

response = chain.invoke({"checklist": generator.test_items})

In [45]:
print(response['results'][0]['Function'])

def test_data_file_load():
    """
    Ensure that data-loading functions correctly load files when they exist and match the expected format, handle non-existent files appropriately, and return the expected results.
    """
    # Test implementation here


In [33]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

# prompt = PromptTemplate(
#     template="Answer the user query.\n{format_instructions}\n{query}\n",
#     input_variables=["query"],
#     partial_variables={"format_instructions": parser.get_format_instructions()},
# )

# chain = prompt | llm | parser

# chain.invoke({"query": actor_query})
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "name of an actor", "title": "Name", "type": "string"}, "film_names": {"description": "list of names of films they starred in", "items": {"type": "string"}, "title": "Film Names", "type": "array"}}, "required": ["name", "film_names"]}
```


In [19]:
prompt = PromptTemplate(
    template="You are an expert Machine Learning Engineer.\n"
             "Please generate example Python test functions for machine learning projects.\n"
             "{format_instructions}\n" 
             "Here is a list of JSON objects, in which the 'Function's are the functions to be filled:\n```{functions}```\n",
             #"Here is the code to be analyzed:\n{context}",
    description="Filling Test Functions for Machine Learning Project",
    #input_variables=["checklist", "context"],
    input_variables=["functions"],
    partial_variables={"format_instructions": generator.parser.get_format_instructions()},
)

class TestGeneration(BaseModel):
    ID: str = Field(description="The corresponding `ID` of the checklist item provided")
    Title: str = Field(description="The corresponding `Title` of the checklist item provided")
    Function: str = Field(description="A Python test function")

class GenResult(BaseModel):
    results: List[TestGeneration]

parser = JsonOutputParser(pydantic_object=GenResult)

chain = prompt | generator.llm | parser

response = chain.invoke({"functions": json.dumps(generator.spec)})
response

{'results': [{'ID': '2.1',
   'Title': 'Ensure Data File Loads as Expected',
   'Function': 'def test_data_file_loads_as_expected():\n    """Tests the data-loading function to ensure files load correctly and return expected results\n\n    Parameters:\n    ----------\n    Returns:\n    ----------\n    """\n    pass'},
  {'ID': '3.2',
   'Title': 'Data in the Expected Format',
   'Function': 'def test_data_in_expected_format():\n    """Verifies that the data matches the expected format for processing algorithms\n\n    Parameters:\n    ----------\n    Returns:\n    ----------\n    """\n    pass'},
  {'ID': '3.5',
   'Title': 'Check for Duplicate Records in Data',
   'Function': 'def test_check_duplicate_records_in_data():\n    """Checks for duplicate records in the dataset and ensures there are none\n\n    Parameters:\n    ----------\n    Returns:\n    ----------\n    """\n    pass'},
  {'ID': '4.2',
   'Title': 'Verify Data Split Proportion',
   'Function': 'def test_verify_data_split_pr

In [20]:
print(response['results'][0]['Function'])

def test_data_file_loads_as_expected():
    """Tests the data-loading function to ensure files load correctly and return expected results

    Parameters:
    ----------
    Returns:
    ----------
    """
    pass


In [7]:
print(func['results'][0]['Function'])

def test_data_file_loads_as_expected():
    """Tests the data-loading function to ensure files load correctly and return expected results

    Parameters:
    ----------
    Returns:
    ----------
    """
    pass


In [6]:
response = generator.chain.invoke({"checklist": generator.test_items})

result = response['results']

In [7]:
print(result[0]['Function'])

def test_data_file_loads_as_expected():
    """
    Ensure that data-loading functions correctly load files when they exist and match the expected format, handle non-existent files appropriately, and return the expected results.
    """
    pass


In [6]:
print(generator.parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"TestSpecGeneration": {"properties": {"ID": {"description": "The corresponding `ID` of the checklist item provided", "title": "Id", "type": "string"}, "Title": {"description": "The corresponding `Title` of the checklist item provided", "title": "Title", "type": "string"}, "Function": {"description": "A test function with the docstring of numpy format", "title": "Function", "type": "string"}}, "required": ["ID", "Title", "Function"], "title": "TestSpecGeneration", "type": "object"}}, "properties": {"results": {"items": {"$ref": "#/$d

In [4]:
from typing import Callable

In [3]:
callable

<function callable(obj, /)>