## LangCST: Chain-Lang for LibCST Transformers

The goal of this project is to develop a lang-chain for GPT-3 to generate code and unit-tests from a Parsel template and then run imaginary unit-tests on the code.

In [1]:
import os
#insecure and not recommended should set up environment variables from the command line
os.environ["OPENAI_API_KEY"] = "your key here"

In [2]:
from langchain.llms import OpenAI
llm = OpenAI(temperature=0,max_tokens= -1)

In [3]:
InfiniteWhile =  '''class InfiniteWhileTransformer(ContextAwareTransformer):
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context: CodemodContext):
        super().__init__(context)
        self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
    def mutate(self, tree: cst.Module,reverse: bool = False) -> cst.Module:
            self.reverse=reverse
            return self.transform_module(tree)
    def leave_While(self, original_node: cst.While, updated_node: cst.While) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        print("already modified",already_modified)
        if not already_modified: 
            updated_node = cst.While(
                test=cst.Name("True"),
                body=original_node.body
            )
            self.context.scratch[meta_pos.start] = {
                "modified": True,
                "original_position": meta_pos,
                "original_node": original_node,
                "updated_node": updated_node,
                "author": self.id
            }
        return updated_node '''
    

        
ComparisonSwap = '''class ComparisonSwapTransformer(ContextAwareTransformer):
    """ Swap the left and right side of a comparison, if there are moe than one comparison it swaps the first one"""    
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context: CodemodContext):
        super().__init__(context)
        self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
    def mutate(self, tree: cst.Module,) -> cst.Module:
            return self.transform_module(tree)
    

    def leave_Comparison(self, original_node: cst.Comparison, updated_node: cst.Comparison) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        
        comparisons = original_node.comparisons
        
        # only update the first comparison
        if not already_modified and len(comparisons) > 0:
            left = original_node.left
            lpar = original_node.lpar
            rpar = original_node.rpar
            first_comparison = comparisons[0]
            new_left = first_comparison.comparator
            first_comparison = cst.ComparisonTarget(
                operator=first_comparison.operator, 
                comparator=left
            )
            
            comparisons = list(comparisons)
            comparisons[0] = first_comparison
            comparisons = tuple(comparisons)
            updated_node = cst.Comparison(
                left=new_left,
                comparisons=comparisons,
                lpar=lpar,
                rpar=rpar
            )
            self.context.scratch[meta_pos.start] = {
                "modified": True, 
                "original_position": meta_pos,
                "original_node":original_node ,
                "updated_node":updated_node,
                "author":self.id
                } 
        return updated_node  '''



OffByOneIndex = '''class OffByOneIndexTransformer(ContextAwareTransformer):
    """ Uses the leave_Index method to add 1 to the index value"""
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context: CodemodContext):
        super().__init__(context)
        self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
    def mutate(self, tree: cst.Module,) -> cst.Module:
            return self.transform_module(tree)
    def leave_Index(self, original_node: cst.Index, updated_node: cst.Index) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        if not already_modified:
            # print("adding to scratch",meta_pos.start, meta_pos.end)
            print(original_node.value.value, type(original_node.value.value))
            updated_node = original_node.with_changes(value=original_node.value.with_changes(value=str(int(original_node.value.value)+1)))
            self.context.scratch[meta_pos.start] = {
                "modified": True, 
                "original_position": meta_pos,
                "original_node":original_node ,
                "updated_node":updated_node,
                "author":self.id
                } 
        return updated_node    
    def leave_Slice(self, original_node: cst.Slice, updated_node: cst.Slice) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        if not already_modified:
            # print("adding to scratch",meta_pos.start, meta_pos.end)
            lower = original_node.lower.value if original_node.lower is not None else None
            upper = original_node.upper.value if original_node.upper is not None else None
            step = original_node.step.value if original_node.step is not None else None
            print(type(lower), type(upper), step)
            if lower is not None:
                lower =cst.parse_expression(str(int(lower)+1))
            if upper is not None:
                upper = cst.parse_expression(str(int(upper)+1))
            updated_node = original_node.with_changes(lower=lower, upper=upper, step=step)
            self.context.scratch[meta_pos.start] = {
                "modified": True, 
                "original_position": meta_pos,
                "original_node":original_node ,
                "updated_node":updated_node,
                "author":self.id
                } 
        return updated_node '''
        
ForgettingToUpdateVariable = '''class ForgettingToUpdateVariableTransformer(ContextAwareTransformer):
        METADATA_DEPENDENCIES = (PositionProvider, )
        def __init__(
            self,
            context: CodemodContext):
            super().__init__(context)
            self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
        def transform_module_impl(self, tree: cst.Module) -> cst.Module:
            return tree.visit(self)
        def mutate(self, tree: cst.Module,reverse: bool = False) -> cst.Module:
            return self.transform_module(tree)
              
        def leave_Assign(self, original_node:cst.Assign, updated_node: cst.Assign) -> None:
            meta_pos = self.get_metadata(PositionProvider, original_node)
            #only updates nodes that are not already in the scratch
            already_modified = is_modified(original_node,meta_pos,self.context)
            if not already_modified: 
                # var_name = original_node.value
                # old_target = original_node.targets
                # new_target = AssignTarget(target=var_name)
                # print("pred updating with value",original_node.value)
                # print("pred updating with target",original_node.targets[0].target)

                updated_node = original_node.with_changes(value=original_node.targets[0].target)

                self.context.scratch[meta_pos.start] = {
                "modified": True, 
                "original_position": meta_pos,
                "original_value":original_node.value,
                "updated_value":original_node.targets[0].target,
                "original_node":original_node ,
                "updated_node":updated_node,
                "author":self.id
                } 
            return updated_node '''

In [4]:
while_parsel="""1 InfiniteWhileTransformer(context: CodemodContext): Replaces existing test of the while loop with True and saves modification to scratchpad
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers. 
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_While,]
6a leave_While = the transformer will swap the test of the while loop with True """
print(while_parsel)
comparison_parsel="""1 ComparisonSwapTransformer(context: CodemodContext): Swap the left and right side of a comparison, if there are more than one comparison it swaps the first one
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers.
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_Comparison,]
6a leave_Comparison = the transformer will swap the left and right side of a comparison, if there are more than one comparison it swaps the first one
6a only_first_comparison = True, the transformer only updates the first comparison in the comparison sequence"""



1 InfiniteWhileTransformer(context: CodemodContext): Replaces existing test of the while loop with True and saves modification to scratchpad
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers. 
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_While,]
6a leave_While = the transformer will swap the test of the while loop with True 


First we try to go from parsel to code with no examples

In [5]:
from langchain.prompts import PromptTemplate

parsel_prompt = PromptTemplate(
    input_variables=["parsel"],
    template="Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. The following meta-code {parsel} can be transformed into the following python code:",
)



print("Running the following prompt:")
print(parsel_prompt.format(parsel=comparison_parsel))
print("GPT-3 output:")
code_no_example = llm(parsel_prompt.format(parsel=comparison_parsel))
print(code_no_example)

Running the following prompt:
Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. The following meta-code 1 ComparisonSwapTransformer(context: CodemodContext): Swap the left and right side of a comparison, if there are more than one comparison it swaps the first one
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers.
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_Comparison,]
6a leave_Comparison = the transformer will 

Here we try to go from code to parsel with one example

In [6]:
from langchain.prompts import PromptTemplate

code_to_parsel = PromptTemplate(
    input_variables=["example_parsel", "example_code","code"],
    template="Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. Here is an example of a LibCst transformer code {example_code} and the corresponding parsel {example_parsel}. Respecting the syntax and style from the example mapping from parsel to example_code The following code {code} can be transformed into the following meta-code parsel style:",
)



print("Running the following prompt:")
print(code_to_parsel.format(example_parsel=while_parsel, example_code=InfiniteWhile, code=OffByOneIndex))
print("GPT-3 output:")
predicted_parsel = llm(code_to_parsel.format(example_parsel=while_parsel, example_code=InfiniteWhile, code=OffByOneIndex))
print(predicted_parsel)

Running the following prompt:
Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. Here is an example of a LibCst transformer code class InfiniteWhileTransformer(ContextAwareTransformer):
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context: CodemodContext):
        super().__init__(context)
        self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
    def mutate(self, tree: cst.Module,reverse: bool = False) -> cst.Module:
            self.reverse=reverse
            return self.transform_module(tree)
    def leave_While(self, original_node: cst.While, updated_node: cst.While) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        print("alre

In [7]:
from langchain.prompts import PromptTemplate

parsel_to_code = PromptTemplate(
    input_variables=["example_parsel", "example_code","parsel"],
    template="Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. Here is an example of a LibCst transformer code {example_code} and the corresponding parsel {example_parsel}. Respecting the syntax and style from the example mapping from parsel to example_code The following parsel style meta-code {parsel} can be transformed into the following python code:",
)


print("Running the following prompt:")
print(parsel_to_code.format(example_parsel=while_parsel, example_code=InfiniteWhile, parsel=predicted_parsel))
print("GPT-3 output:")
predicted_parsel = llm(parsel_to_code.format(example_parsel=while_parsel, example_code=InfiniteWhile,parsel=predicted_parsel))
print(predicted_parsel)

Running the following prompt:
Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. Here is an example of a LibCst transformer code class InfiniteWhileTransformer(ContextAwareTransformer):
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context: CodemodContext):
        super().__init__(context)
        self.id = f"{self.__class__.__name__}-{uuid.uuid4().hex[:4]}"
    def mutate(self, tree: cst.Module,reverse: bool = False) -> cst.Module:
            self.reverse=reverse
            return self.transform_module(tree)
    def leave_While(self, original_node: cst.While, updated_node: cst.While) -> None:
        meta_pos = self.get_metadata(PositionProvider, original_node)
        #only updates nodes that are not already in the scratch
        already_modified = is_modified(original_node,meta_pos,self.context)
        print("alre

Here we fix the few shot prompt template that had double parsing for examples leading to erroneous parsing of the script. 

In [8]:
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Extra, root_validator

from langchain.prompts.base import (
    DEFAULT_FORMATTER_MAPPING,
    BasePromptTemplate,
    check_valid_template,
)
from langchain.prompts.example_selector.base import BaseExampleSelector
from langchain.prompts.prompt import PromptTemplate
class FewShotPromptTemplate(BasePromptTemplate, BaseModel):
    """Prompt template that contains few shot examples."""

    examples: Optional[List[dict]] = None
    """Examples to format into the prompt.
    Either this or example_selector should be provided."""

    example_selector: Optional[BaseExampleSelector] = None
    """ExampleSelector to choose the examples to format into the prompt.
    Either this or examples should be provided."""

    example_prompt: PromptTemplate
    """PromptTemplate used to format an individual example."""

    suffix: str
    """A prompt template string to put after the examples."""

    input_variables: List[str]
    """A list of the names of the variables the prompt template expects."""

    example_separator: str = "\n\n"
    """String separator used to join the prefix, the examples, and suffix."""

    prefix: str = ""
    """A prompt template string to put before the examples."""

    template_format: str = "f-string"
    """The format of the prompt template. Options are: 'f-string', 'jinja2'."""

    validate_template: bool = True
    """Whether or not to try validating the template."""

    @root_validator(pre=True)
    def check_examples_and_selector(cls, values: Dict) -> Dict:
        """Check that one and only one of examples/example_selector are provided."""
        examples = values.get("examples", None)
        example_selector = values.get("example_selector", None)
        if examples and example_selector:
            raise ValueError(
                "Only one of 'examples' and 'example_selector' should be provided"
            )

        if examples is None and example_selector is None:
            raise ValueError(
                "One of 'examples' and 'example_selector' should be provided"
            )

        return values

    @root_validator()
    def template_is_valid(cls, values: Dict) -> Dict:
        """Check that prefix, suffix and input variables are consistent."""
        if values["validate_template"]:
            check_valid_template(
                values["prefix"] + values["suffix"],
                values["template_format"],
                values["input_variables"],
            )
        return values

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    def _get_examples(self, **kwargs: Any) -> List[dict]:
        if self.examples is not None:
            return self.examples
        elif self.example_selector is not None:
            return self.example_selector.select_examples(kwargs)
        else:
            raise ValueError

    def format(self, **kwargs: Any) -> str:
        """Format the prompt with the inputs.

        Args:
            kwargs: Any arguments to be passed to the prompt template.

        Returns:
            A formatted string.

        Example:

        .. code-block:: python

            prompt.format(variable1="foo")
        """
        # Get the examples to use.
        examples = self._get_examples(**kwargs)
        # Format the examples.
        example_strings = [
            self.example_prompt.format(**example) for example in examples
        ]
        # pre format the suffix

        try:
            formatted_prefix = DEFAULT_FORMATTER_MAPPING[self.template_format](self.prefix, **kwargs)
        except:
            formatted_prefix = self.prefix 
        try:       
            formatted_suffix = DEFAULT_FORMATTER_MAPPING[self.template_format](self.suffix, **kwargs)
        except:
            formatted_suffix = self.suffix    
        # Create the overall template.
        pieces = [formatted_prefix, *example_strings, formatted_suffix]
        template = self.example_separator.join([piece for piece in pieces if piece])
        return template


    @property
    def _prompt_type(self) -> str:
        """Return the prompt type key."""
        return "few_shot"

    def dict(self, **kwargs: Any) -> Dict:
        """Return a dictionary of the prompt."""
        if self.example_selector:
            raise ValueError("Saving an example selector is not currently supported")
        return super().dict(**kwargs)

Finally we use an example dictionary with 2 examples to bootstrap either generation of code from parsel or parsel from code

In [9]:
from langchain.prompts import PromptTemplate#, FewShotPromptTemplate

examples = [{"name": "InfiniteWhileTransformer","code": InfiniteWhile, "parsel": while_parsel},
                {"name": "ComparisonSwapTransformer","code": ComparisonSwap,"parsel": comparison_parsel}]
prefix = "Meta-code is described indicating each line with a number and defining in each number a different component of the desired code, all the components must be repsected in the output python code. Here is a list of examples of a LibCst transformer parsel and the corresponding code "
suffix_parsel2code = "Respecting the syntax and style from the example mapping from parsel to example_code The following meta-code {input} can be transformed into the following python code:"
suffix_code2parsel = "Finally, respecting the syntax and style from the examples mapping from parsel to example_code the following code {input} can be transformed into the following parsel style meta-code:"

example_prompt = PromptTemplate(
    input_variables=["name","code", "parsel"],
    template="This is the example parsel for the {name} method:\n {parsel}\n and the corresponding code \n{code}."
)




Code2Parsel

In [10]:
prompt = FewShotPromptTemplate(
    examples=examples, 
    example_prompt=example_prompt, 
    suffix=suffix_code2parsel,
    input_variables=["input"], 
)
print(prompt.format(input=OffByOneIndex))
parsel_from_code = llm(prompt.format(input=OffByOneIndex))
print(parsel_from_code)

This is the example parsel for the InfiniteWhileTransformer method:
 1 InfiniteWhileTransformer(context: CodemodContext): Replaces existing test of the while loop with True and saves modification to scratchpad
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers. 
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_While,]
6a leave_While = the transformer will swap the test of the while loop with True 
 and the corresponding code 
class InfiniteWhileTransformer(ContextAwareTransformer):
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context

Parsel2Code

In [11]:
prompt = FewShotPromptTemplate(
    examples=examples, 
    example_prompt=example_prompt, 
    suffix=suffix_parsel2code,
    input_variables=["input"], 
)
print(prompt.format(input=OffByOneIndex))
code_from_parsel = llm(prompt.format(input=parsel_from_code))
print(code_from_parsel)

This is the example parsel for the InfiniteWhileTransformer method:
 1 InfiniteWhileTransformer(context: CodemodContext): Replaces existing test of the while loop with True and saves modification to scratchpad
2 changes_method = True, the application of this transformer will change the behavior of the method
3 METADATA_DEPENDENCIES = PositionProvider: Identify nodes that have been modified by another transformer
4 check_for_modifications = True, the transformer uses the is_modified(original_node,meta_pos,self.context) to only modifies nodes that have not been mutated by other transformers. 
5 save_modifications = True, the transformers write to self.contex.scratchpad a dictionary with the data of the modifications.
6 leave_list = [leave_While,]
6a leave_While = the transformer will swap the test of the while loop with True 
 and the corresponding code 
class InfiniteWhileTransformer(ContextAwareTransformer):
    METADATA_DEPENDENCIES = (PositionProvider,)
    def __init__(self, context