# FastAPI & Flask
- Donor: FastAPI
- Host: Flask
- Embedding Model: sentence-transformers/all-mpnet-base-v2

Pipeline:
1. Load Test Data from donor and host and store embeddings and metadata to DB
2. Given the organ to transplant from the donor program, use cosine similarity to search for relevant unit tests from  donor (retrieval).
3. For each retrieved unit test, we map with the most similar unit test in host program, maybe using cosine similarity as well.
4. Ask LLM to generate the transplanted code, given the feature code, relevant unit tests from the donor, and mapping functions.

In [2]:
import torch
import faiss
from uuid import uuid4
import json

from sentence_transformers import SentenceTransformer
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document

In [3]:
# Load Data
num_data = 300

def LoadData(json_file, num_data=300):
    with open(json_file) as file:
        return json.load(file)["tests"][:num_data]

donor = LoadData("..\\__internal__\\collected_tests\\collected_tests__fastapi.json", num_data)
host = LoadData("..\\__internal__\\collected_tests\\collected_tests__flask.json", num_data)
organ = donor[-1]

In [11]:
index_name = "index"
db_path = "..\\__internal__\\faissdb"
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
# other embedding to try: all-MiniLM-L6-v2

In [16]:
class VectorDB:
    def __init__(self):
        pass

    def __call__(self):
        raise NotImplementedError("method not implemented") 
    

class FaissDB(VectorDB):
    def __init__(self, embedding_model_name):
        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)    
        self.vector = FAISS(
            embedding_function=embeddings,
            index=faiss.IndexFlatL2(768),
            docstore=InMemoryDocstore(),
            index_to_docstore_id={},
        )

    def StoreData(self, data, type="host"):
        documents = []
        for d in data:
            d["type"] = type
            doc = Document(
                page_content=d["source_code"],
                metadata={k: v for k, v in d.items() if k != "source_code"},
            )
            documents.append(doc)
        
        uuids = [str(uuid4()) for _ in range(len(documents))]
        self.vector.add_documents(documents=documents, ids=uuids)


    def SaveToLocal(self, path, index_name="index"):
        self.vector.save_local(path, index_name)

    def LoadFromLocal(self, path, index_name="index"):
        self.vector = FAISS.load_local(path, self.embeddings, index_name, allow_dangerous_deserialization=True)
        return self.vector

    def GetVector(self):
        return self.vector



In [None]:
db = FaissDB(embedding_model_name)

In [18]:
db.StoreData(host, type="host")
db.StoreData(donor, type="donor")
db.SaveToLocal(db_path)

In [20]:
vector_store = db.LoadFromLocal(db_path, index_name)

In [21]:
# retrieval: retrieving unit tests from the donor that are most relevant to the organ
donor_retrieved = vector_store.similarity_search_with_score(
    organ["source_code"], k=5, filter={"type":"donor"}
)
print("Organ\n"+organ["source_code"])
print(f"Results:\n{"\n".join([result[0].page_content for result in donor_retrieved])}")

Organ
def test_query_repr_none():
    assert repr(Query(None)) == "Query(None)"
Results:
def test_query_repr_none():
    assert repr(Query(None)) == "Query(None)"
def test_query_repr_str():
    assert repr(Query("teststr")) == "Query(teststr)"
def test_param_repr_none():
    assert repr(Param(None)) == "Param(None)"
def test_param_repr_str():
    assert repr(Param("teststr")) == "Param(teststr)"
def test_param_repr_number():
    assert repr(Param(1)) == "Param(1)"


In [22]:
# Function Mapping: mapping the retrieved unit tests with the unit test in host
mapped_func = {}
for result in donor_retrieved:
    donor_content = result[0].page_content
    relevant_host = vector_store.similarity_search_with_score(
        donor_content, k=1, filter={"type":"host"}
    )
    print("Donor Unit Test:\n"+donor_content)
    print(f"Mapped Host Unit Test:\n{"\n".join([result[0].page_content for result in relevant_host])}\n")
    mapped_func[donor_content] = [result[0].page_content for result in relevant_host]

Donor Unit Test:
def test_query_repr_none():
    assert repr(Query(None)) == "Query(None)"
Mapped Host Unit Test:
def test_run_defaults(monkeypatch, app):
    rv = {}

    # Mocks werkzeug.serving.run_simple method
    def run_simple_mock(*args, **kwargs):
        rv["result"] = "running..."

    monkeypatch.setattr(werkzeug.serving, "run_simple", run_simple_mock)
    app.run()
    assert rv["result"] == "running..."

Donor Unit Test:
def test_query_repr_str():
    assert repr(Query("teststr")) == "Query(teststr)"
Mapped Host Unit Test:
def test_method_route(app, client, method):
    method_route = getattr(app, method)
    client_method = getattr(client, method)

    @method_route("/")
    def hello():
        return "Hello"

    assert client_method("/").data == b"Hello"

Donor Unit Test:
def test_param_repr_none():
    assert repr(Param(None)) == "Param(None)"
Mapped Host Unit Test:
def test_method_route(app, client, method):
    method_route = getattr(app, method)
    client_method 

In [23]:
# Turn the mapped dict to a string format for the prompt
pair = ""
i = 1
for donor, host in mapped_func.items():
    pair += f"Pair {i}:\nDonor:\n{donor}\nHost:\n{host[0]}\n\n"
    i += 1

In [24]:
# Augementation and Generation
prompt = f"""
Your task is to help me generate a transplanted unit test code from a donor program to a 
host program. Both programs are written in Python. The code to be transplanted (the 'organ') 
is as follows: \n\n{organ["source_code"]}\n

We have identified relevant donor-host pairs using cosine similarity based on the unit test 
in the organ. However, these pairs may not exactly test the same functionality.

Using these top relevant pairs as guidance, please help me write a unit test that transplants 
the functionality from the donor into the host program. Here are the identified pairs:
{pair}
"""

In [25]:
from openai import OpenAI
import os
client = OpenAI(
    api_key = os.environ.get("OPENAI_API_KEY")
)

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt}]
)

In [26]:
print(f"Organ: {organ["source_code"]}\n")
print(response.choices[0].message.content)

Organ: def test_query_repr_none():
    assert repr(Query(None)) == "Query(None)"

Given the provided unit test from the donor code and its relevant host pairs, we can create a new unit test in the host program that specifically tests the `Query` class's string representation when initialized with `None`. 

I will leverage the structure from the host program while ensuring to correctly test the functionality of the `Query` class aligned with the donor code. Here’s how the transplanted unit test could look:

```python
def test_query_repr_none(app):
    # Assuming 'Query' is a class defined somewhere in the host program
    query_instance = Query(None)
    
    # Test the string representation of the Query object
    assert repr(query_instance) == "Query(None)"
```

### Explanation:
1. **Test Function Name**: I maintained the naming convention where the function describes what it tests, similar to the original donor test.
2. **Creating an Instance**: I created an instance of a `Query` cla

In [None]:
# https://python.langchain.com/docs/integrations/vectorstores/faiss/