In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pygments.lexers import get_lexer_for_filename, ClassNotFound
from collections import Counter


def infer_languages(directory):
    language_counts = Counter()
    for root, dirs, files in os.walk(directory):
        for file in files:
            try:
                path = os.path.join(root, file)
                lexer = get_lexer_for_filename(path)
                # print(f"File: {path}, Language: {lexer.name}")
                language_counts[lexer.name] += 1
            except ClassNotFound:
                # print(f"File: {path}, Language: Unknown")
                pass
    return language_counts


# infer_languages("/path/to/your/directory")

In [None]:
import os
import git
import glob
import tempfile


# Clone the repository to the temporary directory
repo_url = "https://github.com/ericmjl/llamabot.git"
local_dir = tempfile.mkdtemp()
git.Repo.clone_from(repo_url, local_dir)

# Get the Git repository object
repo = git.Repo(local_dir)

# Get the list of files to ignore from .gitignore
ignore_list = []
with open(os.path.join(local_dir, ".gitignore"), "r") as f:
    ignore_list = f.read().splitlines()

# Define the file extensions to include
include_extensions = [".py", ".md"]

# Get the list of files in the repository, ignoring the files listed in .gitignore
relevant_files = []
from tqdm.auto import tqdm

for include_extension in tqdm(include_extensions):
    for file_path in glob.glob(
        os.path.join(local_dir, "**", f"*{include_extension}"), recursive=True
    ):
        if os.path.isfile(file_path) and not any(
            [glob.fnmatch.fnmatch(file_path, ignore) for ignore in ignore_list]
        ):
            relevant_files.append(file_path)

# Print the list of all files in the repository with the specified extensions
print(relevant_files)

In [None]:
from llamabot.code_manipulation import show_directory_tree

file_tree = show_directory_tree(local_dir, ignore_dirs=[".git", ".ipynb_checkpoints"])
print(file_tree)

In [None]:
from pathlib import Path
from llamabot import QueryBot
from outlines import text


@text.prompt
def repobot_sysprompt():
    """You are a bot that answers questions about a git repository."""

In [None]:
from llamabot import SimpleBot


@text.prompt
def triagebot_sysprompt(file_tree: str):
    """You are a bot that triages messages given to you.

    You are working as part of a system that answers questions about a code repository.
    Some questions are high level questions about the repository in general;
    other questions might be mid-level questions about a collection of files;
    while other questions are low-level detailed questions about individual files.

    As context, you have a file tree of files within the repository:

    {{ file_tree }}

    You will be given a question, and must respond with a JSON.
    The JSON is formatted as follows:

    ```json
    {
        "files": [<file_path_1>, <file_path_2>, ...], <-- up to 20 files.
        "file_tree": <file_tree as a raw string> <-- this is optional
    }
    ```

    Based on the level of the question, identify files that are relevant to the question.
    The file paths should be relative to the root of the repository.
    Ensure that you return only the JSON.
    Do not include any Markdown fences.
    """


@text.prompt
def ask_question(query, **kwargs):
    """Here is a question for you to answer about the code repository:

    {{ query }}

    You also have the following context given:

    {% for k, v in kwargs.items() %}
    {{ k }} : {{ v }}
    {% endfor %}

    You should answer with as much detail as you can get from the context.
    """


query = "What's in tutorial001_an.py?"

triagebot = SimpleBot(triagebot_sysprompt(file_tree))

triaged_files = triagebot(query)
import json

# Post-process triaged_files
triaged_files_string = triaged_files.content.strip("```json").strip("```")

triaged_files = json.loads(triaged_files_string)

files = [(Path(local_dir) / fpath).resolve() for fpath in triaged_files["files"]]
files = [f for f in files if f.exists()]


chatbot = QueryBot(
    "You are a bot that answers questions about a git repository.", doc_paths=files
)
repobot = QueryBot(repobot_sysprompt(), doc_paths=files)

# repobot(query)
repobot(ask_question(query, **triaged_files))

In [None]:
@text.prompt
def summarize_file(file_source):
    """Give me a 1 sentence summary of the source file below.

    {{ file_source }}

    Begin with 'A file that...'

    Your response:
    """

In [None]:
from pyprojroot import here


summary_bot = SimpleBot(
    "You are an expert at summarizing the contents of a Python file."
)
summary_bot(summarize_file(here() / "llamabot/cli/__init__.py"))

In [None]:
bot = SimpleBot("You are an expert coder.")

file_tree = show_directory_tree(
    here() / "llamabot",
    ignore_dirs=[".git", ".ipynb_checkpoints", "__pycache__", "*.egg-info"],
)


print(file_tree)

In [None]:
@text.prompt
def file_tree_summary(file_tree):
    """Here is the file tree of a Git repository:

    {{ file_tree }}

    Give me an overview of the project based on the file tree.
    Try to infer the purpose of the project as well.
    """


bot(file_tree_summary(file_tree))

In [None]:
# OK, I think I have a pattern to run with:
#
# 1. Create a triagebot that triages questions to files
# 2. Use the triaged information to create a repobot that loads contextual information.
# 3. Use the repobot to answer the original question.

# After further thinking, I've settled on this pattern:

# 1. Open repo chat.
# 2. Create a

In [None]:
asdfasdfasdf