In [2]:
repo_url = 'https://github.com/fraimwork/fraimwork'

In [3]:
from utils.gitutils import clone_repo

# Clone the repo
repo = clone_repo(repo_url, max_size_mb=10000)
local_repo_path = str(repo.working_dir)

In [4]:
from utils.filetreeutils import FileTree

(file_tree := FileTree.from_dir(local_repo_path))

├── .git\
│   ├── hooks\
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── update.sample
│   ├── info\
│   │   ├── exclude
│   ├── logs\
│   │   ├── refs\
│   │   │   ├── heads\
│   │   │   │   ├── main
│   │   │   ├── remotes\
│   │   │   │   ├── origin\
│   │   │   │   │   ├── HEAD
│   │   ├── HEAD
│   ├── objects\
│   │   ├── 03\
│   │   │   ├── adc8d20747d089c33376e807b93d7bf21923f1
│   │   ├── 08\
│   │   │   ├── 0d6c77ac21bb2ef88a6992b2b73ad93daaca92
│   │   ├── 15\
│   │   │   ├── 9d036e9c2fdaa43066cf2e164fd8d73dcb865d
│   │   ├── 18\
│   │   │   ├── bd8a2c63421a9de54306a9a7159f69e54a8fb1
│   │   │   ├── cce057c077746ef591bded3b5fafd164afe553
│   │   ├── 1c\
│   │   │   ├── 670ebbee0f6cf94886961a9c

In [5]:
from utils.agent import Agent, GenerationConfig
from dotenv import load_dotenv
import os

load_dotenv()

API_KEY = os.getenv('API_KEY')

classifier = Agent(
    model_name="gemini-1.5-flash-001",
    api_key=API_KEY,
    name="classifier",
    generation_config=GenerationConfig(temperature=0.3),
    system_prompt=f"""You are a software engineer tasked with classifying the framework of a codebase into one of the  following frameworks:
- Angular
- React
- React Native
- Flutter
- Flask
- Django
- Ruby on Rails
- Firebase
- Vue
- Express
- Laravel
- Spring Boot
- ASP.NET
In the following prompts, you will be given the subdirectories of a codebase. You can respond with one of the following:
- IDENTIFY: FRAMEWORK_NAME (Only select this if you are absolutely certain that the ROOT directory conforms) (it should be used sparingly) (directory names can be misleading)
- N/A (if you cannot identify the framework based on the subdirectories)
- END (if you are fairly certain that no frameworks are in the subdirectories)"""
)

In [6]:
def flatmap(lst):
    """Flattens a list of arbitrary depth into a single list.

    Args:
        lst: The list to flatten.

    Returns:
        A flattened list.
    """

    result = []
    for item in lst:
        if isinstance(item, list):
            result.extend(flatmap(item))
        elif item is not None:
            result.append(item)
    return result


In [7]:
import re
from utils.agent import Interaction
from utils.languageutils import get_imports
import networkx as nx
# RegEx to match classifier responses
identify_re = re.compile(r"IDENTIFY: (.+)")
end_re = re.compile(r"END")
unable_re = re.compile(r"N/A")

def classify_node(node):
    if node not in file_tree.nodes:
        print(f"Node {node} not found in file tree")
        return
    node_dict = file_tree.nodes[node]
    neighbors = list(file_tree[node])
    prompt = f"{file_tree.nodes[node]['name']}\n" + '\n'.join(f"├── {file_tree.nodes[k]['name']}" for k in neighbors)
    context = [Interaction(f'Relevant content of {neighbor}\n----\n' + '\n'.join(imports), '...') for  neighbor in  neighbors if 'content' in file_tree.nodes[neighbor] and (imports := get_imports(file_tree.nodes[neighbor]['path']))]
    response = classifier.chat(prompt, custom_context=context)
    # Match the response to the RegEx
    if (m := identify_re.match(response)):
        framework  = m.group(1).strip()
        print(f"{node_dict['name']} is a {framework} project")
        file_tree.nodes[node]['framework'] = framework
        for node in nx.dfs_tree(file_tree, node).nodes:
            file_tree.nodes[node]['frameworks'] = [framework]
        return framework
    elif (m := unable_re.match(response)):
        print(f"Searching {node} ...")
        frameworks = flatmap([classify_node(node) for node in neighbors if 'content' not in file_tree.nodes[node]])
        file_tree.nodes[node]['frameworks'] = frameworks
    elif (m := end_re.match(response)):
        print(f"{node_dict['name']} is not a framework")
        return None
    else:
        print(f"Classifier response: {response}")
        return None

(root_classification := classify_node(file_tree.root_node()))

Searching . ...
.git is not a framework
.idx is not a framework
.vscode is not a framework
Searching backend ...
app is a Flask project
firebase is a Firebase project
frontend is a React project


['Flask', 'Firebase', 'React']