In [1]:
import json
import os
from typing import List
import asyncio

from continuedev.core.sdk import ContinueSDK
from continuedev.libs.index.chunkers import Chunk
from continuedev.headless import start_headless_session
from continuedev.libs.index.pipelines.main import main_retrieval_pipeline, retrieval_step
from continuedev.libs.index.hyde import code_hyde
from continuedev.libs.index.rerankers.single_token import single_token_reranker_parallel

session = await start_headless_session(directory="/Users/natesesti/Desktop/continue/server/continuedev")
sdk: ContinueSDK = session.autopilot.continue_sdk

  from .autonotebook import tqdm as notebook_tqdm
[2023-10-24 23:31:12,190] [DEBUG] New session: None
[2023-10-24 23:31:12,195] [DEBUG] Loaded Continue config file from /Users/natesesti/.continue/config.py
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x127737d10>
[2023-10-24 23:31:12,266] [DEBUG] Starting context manager


In [2]:
async def f(q: str):
    return await main_retrieval_pipeline(q, sdk)

In [3]:
with open("y.json", "r") as file:
    data = json.load(file)

print(data)
questions = data["questions"]
base_path = data["basePath"]

def abs_paths(question):
    return list(map(
        lambda x: os.path.join(base_path, x),
        question["files"]
    ))

def in_y(chunk: Chunk, question):
    return chunk.document_id in abs_paths(question)

def f1_score(chunks: List[Chunk], question) -> float:
    chosen_files = list(set(map(lambda c: c.document_id, chunks)))
    actual_files = abs_paths(question)
    tp = len(list(filter(lambda x: x in actual_files, chosen_files)))
    fp = len(chosen_files) - tp
    fn = len(actual_files) - tp
    if tp == 0:
        return 0

    return 2 * tp / (2 * tp + fp + fn)

def get_recall(chunks: List[Chunk], question) -> float:
    chosen_files = list(set(map(lambda c: c.document_id, chunks)))
    actual_files = abs_paths(question)
    tp = len(list(filter(lambda x: x in actual_files, chosen_files)))
    fn = len(actual_files) - tp
    if tp == 0:
        return 0

    return tp / (tp + fn)

{'basePath': '/Users/natesesti/Desktop/continue/server/continuedev', 'questions': [{'q': 'How does the Continue FastAPI server work?', 'files': ['server/main.py', 'server/gui.py', 'server/ide.py']}, {'q': 'Where is logic for pruning the prompt tokens?', 'files': ['libs/util/count_tokens.py', 'libs/llm/base.py']}, {'q': 'Where does codebase indexing happen?', 'files': ['plugins/steps/chroma.py', 'libs/index/indices/chroma_index.py', 'libs/index/indices/meilisearch_index.py', 'libs/index/indices/base.py', 'core/context.py']}, {'q': 'How is meilisearch used?', 'files': ['plugins/steps/chroma.py', 'libs/index/indices/meilisearch_index.py', 'libs/index/indices/base.py', 'core/context.py', 'server/meilisearch_server.py']}, {'q': 'I want to update Ollama integration', 'files': ['libs/llm/ollama.py']}, {'q': 'How does Continue chunk code?', 'files': ['libs/index/chunkers/basic.py', 'libs/index/chunkers/chunk_directory.py', 'libs/index/chunkers/chunk.py', 'libs/index/chunkers/code.py']}]}


In [4]:
# scores = []
# for question in questions:
#     chunks = await f(question["q"])
#     score = f1_score(chunks, question)
#     print(score)
#     scores.append(score)

# print(scores)
# print(sum(scores) / len(scores))

In [5]:
hyde_mapping = {}
hydes = await asyncio.gather(*[code_hyde(question["q"], "python", sdk) for question in questions])
for i, hyde in enumerate(hydes):
    hyde_mapping[questions[i]["q"]] = hyde

[2023-10-24 23:31:12,418] [DEBUG] Loading Meilisearch index...
[2023-10-24 23:31:12,639] [DEBUG] Loaded 0 documents into meilisearch in 0.018690824508666992 seconds for context provider code
[2023-10-24 23:31:12,645] [DEBUG] Loaded 1 documents into meilisearch in 0.02457594871520996 seconds for context provider search
[2023-10-24 23:31:12,661] [DEBUG] Loaded 1 documents into meilisearch in 0.040534019470214844 seconds for context provider terminal
[2023-10-24 23:31:12,662] [DEBUG] Loaded 1 documents into meilisearch in 0.041803836822509766 seconds for context provider url
[2023-10-24 23:31:12,663] [DEBUG] Loaded 1 documents into meilisearch in 0.042099952697753906 seconds for context provider diff
[2023-10-24 23:31:12,664] [DEBUG] Loaded 152 documents into meilisearch in 0.04308819770812988 seconds for context provider file
[2023-10-24 23:31:12,664] [DEBUG] Loaded Meilisearch index


In [6]:
async def get_score(question):
    chunks = await retrieval_step(hyde_mapping[question["q"]], sdk, n_retrieve=50)
    score = get_recall(chunks, question)
    print(score)
    return score

scores = await asyncio.gather(*map(get_score, questions))

print(scores)
print(sum(scores) / len(scores))

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x147c077d0>


Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
1.0
1.0
1.0
0.6
1.0
0.8
[1.0, 1.0, 0.6, 0.8, 1.0, 1.0]
0.9


In [7]:
async def get_rerank_score(question):
    initial_chunks = await retrieval_step(hyde_mapping[question["q"]], sdk, n_retrieve=50)
    initial_recall = get_recall(initial_chunks, question)
    print(len(initial_chunks))
    chunks = await single_token_reranker_parallel(initial_chunks, question["q"], 10, sdk)
    print(len(chunks))
    score = get_recall(chunks, question) / initial_recall
    print(score)

    missed_by_rerank = [chunk for chunk in initial_chunks if in_y(chunk, question) and chunk.document_id not in [c.document_id for c in chunks]]
    print("MISSED BY RERANK: \n\n")
    for chunk in missed_by_rerank:
        print(chunk.document_id)
        print(chunk.content)
        print("\n\n-----------------------------\n\n")

    return score

scores = await asyncio.gather(*map(get_rerank_score, questions))

print(scores)
print(sum(scores) / len(scores))

Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
Scanning 50 files...
25
25
25
25
25
25
10
1.0
MISSED BY RERANK: 


10
0.7499999999999999
MISSED BY RERANK: 


/Users/natesesti/Desktop/continue/server/continuedev/plugins/steps/chroma.py
class AnswerQuestionChroma(Step):
    ...

    async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
        chroma_index = ChromaCodebaseIndex(
            sdk.ide.workspace_directory, openai_api_key=self.openai_api_key
        )
        meilisearch_index = MeilisearchCodebaseIndex(sdk.ide.workspace_directory)

        self.hide = False
        self.description = f"Scanning {self.n_retrieve} files..."
        await sdk.update_ui()

        # Get top chunks from index
        to_retrieve_from_each = (
            self.n_retrieve if self.use_reranking else self.n_final
        ) // 2
        chroma_chunks = await chroma_index.query(
            self.user_input, n=to_retrieve_fr

In [18]:
PROMPT = """
You are an expert software developer responsible for helping detect whether the retrieved snippet of code is relevant to the query. For a given input, you need to output a single word: "Yes" or "No" indicating the retrieved snippet is relevant to the query.

Query: Where is the FastAPI server?
Snippet:
```/Users/andrew/Desktop/server/main.py
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
def read_root():
    return {"Hello": "World"}
```
Relevant: Yes

Query: Where in the documentation does it talk about the UI?
Snippet:
```/Users/andrew/Projects/bubble_sort/src/lib.rs
fn bubble_sort<T: Ord>(arr: &mut [T]) {
    for i in 0..arr.len() {
        for j in 1..arr.len() - i {
            if arr[j - 1] > arr[j] {
                arr.swap(j - 1, j);
            }
        }
    }
}
```
Relevant: No

Query: Where does codebase indexing happen?
Snippet:
```/Users/natesesti/Desktop/continue/server/continuedev/plugins/steps/chroma.py
class AnswerQuestionChroma(Step):
    ...

    async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
        chroma_index = ChromaCodebaseIndex(
            sdk.ide.workspace_directory, openai_api_key=self.openai_api_key
        )
        meilisearch_index = MeilisearchCodebaseIndex(sdk.ide.workspace_directory)

        self.hide = False
        self.description = f"Scanning {self.n_retrieve} files..."
        await sdk.update_ui()

        # Get top chunks from index
        to_retrieve_from_each = (
            self.n_retrieve if self.use_reranking else self.n_final
        ) // 2
        chroma_chunks = await chroma_index.query(
            self.user_input, n=to_retrieve_from_each
        )
        meilisearch_chunks = await meilisearch_index.query(
            self.user_input, n=to_retrieve_from_each
        )
        chunk_ids = set()
        chunks = []
        for chunk in chroma_chunks + meilisearch_chunks:
            if chunk.id not in chunk_ids:
                chunk_ids.add(chunk.id)
                chunks.append(chunk)

        # Rerank to select top results
        self.description = f"Selecting most important files..."
        await sdk.update_ui()

        if self.use_reranking:
            chunks = await default_reranker_parallel(
                chunks,
                self.user_input,
                self.n_final,
                sdk,
                group_size=self.rerank_group_size,
            )

        # Add context items
        context_items: List[ContextItem] = []
        for chunk in chunks:
            # Can we select the context item through the normal means so that the name is disambiguated?
            # Also so you don't have to understand the internals of the context provider
            # OR have a chunk context provider??? Nice short-term, but I don't like it for long-term
            ctx_item = ContextItem(
                content=chunk.content,
                description=ContextItemDescription(
                    name=f"{os.path.basename(chunk.document_id)} ({chunk.start_line}-{chunk.end_line})",
                    description=chunk.document_id,
                    id=ContextItemId(
                        provider_title="file",
                        item_id=remove_meilisearch_disallowed_chars(chunk.document_id),
                    ),
                ),
            )  # Should be 'code' not file! And eventually should be able to embed all context providers automatically!

            context_items.append(ctx_item)
            await sdk.add_context_item(ctx_item)

        self.hide = True
        model = sdk.models.chat.model
        # if model == "gpt-4":
        #     model = "gpt-4-32k"  # Not publicly available yet?
        if model == "gpt-3.5-turbo":
            model = "gpt-3.5-turbo-16k"

        await sdk.run_step(
            SimpleChatStep(
                name="Answer Question",
                description=f"Reading from {len(context_items)} files...",
                completion_options=CompletionOptions(model=model),
            )
        )

        # for ctx_item in context_items:
        #     await sdk.delete_context_item(ctx_item.description.id)
```
Relevant: """

In [19]:
import openai
openai.api_key = ""
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=PROMPT,
    max_tokens=1,
    temperature=0.0,
    stop=["```"],
    logit_bias={3363: 1, 1400: 1},
    logprobs=10,
)
response["choices"][0]["logprobs"]

<OpenAIObject at 0x11377c6b0> JSON: {
  "tokens": [
    " No"
  ],
  "token_logprobs": [
    -0.0012104723
  ],
  "top_logprobs": [
    {
      " No": -0.0012104723,
      "\n": -6.8743377,
      "No": -8.752341,
      "\u00a0": -11.196322,
      "<|endoftext|>": -13.671831
    }
  ],
  "text_offset": [
    4147
  ]
}