In [46]:
import os
from dotenv import load_dotenv

load_dotenv()  # Loads env from .env file

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY:
    raise ValueError("Missing GROQ_API_KEY")



In [47]:
from git import Repo  # pip install gitpython

#Repo.clone_from("https://github.com/itsmejul/flappy-evolve", "./dir")
#Repo.clone_from("https://github.com/syn-ce/juliasetexplorer", "./temp")

In [48]:
import subprocess
from pathlib import Path

def clone_repo(repo_url: str, target_dir: str):
    #subprocess.run(["git", "clone", "--depth", "1", repo_url, target_dir], check=True, stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
    #tempPath = Path(target_dir)
    #gitPath = tempPath / ".git"
    #subprocess.run(["rm", "-rf", gitPath], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    from git import Repo
    Repo.clone_from(repo_url, target_dir)

In [49]:
def read_file_to_string(file_path: str) -> str:
    path = Path(file_path)
    if not path.is_file():
        raise FileNotFoundError(f"{file_path} is not a file.")
    return path.read_text(encoding="utf-8", errors="ignore")

In [50]:
def remove_first_folder(file_path: str) -> str:
    parts = Path(file_path).parts
    if len(parts) <= 1:
        return file_path  # Nothing to remove
    return str(Path(*parts[1:]))

In [51]:
def read_dir_rec(dir_path):
    files = dict()
    from pathlib import Path
    root = Path(dir_path)
    for child in root.iterdir():
        if child.is_file():
            print("A")
            files[remove_first_folder(root / child.name)] = read_file_to_string(root / child.name)
            print(f"File: {root / child.name}")
        elif child.is_dir():
            print(f"Directory: {child.name}")
            files = files | read_dir_rec(root / child.name)
    return files

In [52]:

from llama_index.core.schema import TextNode
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.core.indices import VectorStoreIndex, load_index_from_storage

In [53]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
# Set the embedding model, we do this only once when we start the backend

In [54]:
def read_directory_documents(path):
    from llama_index.core import SimpleDirectoryReader

    documents = SimpleDirectoryReader("./temp").load_data()
    return documents

In [55]:
def create_index(documents):


    # Create the index
    vector_store = SimpleVectorStore() # TODO maybe use a more capable vector store like qdrant? 
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    llm = Ollama(model="gemma3:4b", request_timeout=300)#, base_url="http://172.26.44.37:11434") # 

    index = VectorStoreIndex(documents)

    # Store index in directory
    index.storage_context.persist(persist_dir="./index")

In [56]:
def create_index_old(files_dict):

    # Convert dict into list, concatenate file names and contents into one string
    documents = [
        {
            "path": name,
            "text": f"FILE PATH: \n{name}, \nFILE CONTENT: \n{content}"
        }
        for name, content in files_dict.items()
    ]

    # Load documents into textnodes
    text_nodes = []
    for d in documents:
        new_node = TextNode(text=d["text"], metadata={"path" : d["path"]})
        text_nodes.append(new_node)

    # Create the index
    vector_store = SimpleVectorStore() # TODO maybe use a more capable vector store like qdrant? 
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    llm = Ollama(model="gemma3:4b", request_timeout=300)#, base_url="http://172.26.44.37:11434") # 

    index = VectorStoreIndex(text_nodes)

    # Store index in directory
    index.storage_context.persist(persist_dir="./index")

In [57]:
def query_index(query):
    storage_context = StorageContext.from_defaults(persist_dir="./index")
    index = load_index_from_storage(storage_context)
    #query="What are the methods that make up the genetic algorithm?"
    #query = "How did they center the div?"
    #query = "how does the game loop work?"
    
    retriever_engine = index.as_retriever(similarity_top_k=10)
    retrieval_results = retriever_engine.retrieve(query)
    retrieved_drawing_ids = [n.node.metadata["file_path"] for n in retrieval_results]
    print(retrieved_drawing_ids)
    print([n.node.text for n in retrieval_results][:3])
    top_3_results = [n.node.text for n in retrieval_results][:3]
    return top_3_results

In [58]:
def query_llm(retrieval_results, query):
    context = ""
    for result in retrieval_results:
        context += result
    from groq import Groq
    client = Groq() # Loads the API key automatically from the environment variable
    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "user",
                "content": f"{context}\n {query}"
            }
        ]
    )
    print(completion.choices[0].message.content)

In [59]:
def clone_and_read(repo_url, query):
    clone_repo(repo_url=repo_url, target_dir="./temp")
    #parsed_files = read_dir_rec("./temp")
    #print(parsed_files)
    documents = read_directory_documents("./temp")
    create_index(documents)
    retrieval_results = query_index(query)
    llm_response = query_llm(query, retrieval_results)
    
    subprocess.run(["rm", "-rf", "./temp"])

In [64]:
clone_and_read("https://github.com/syn-ce/juliasetexplorer", "How are the fractals drawn to the screen? Reference relevant code snippets")
#clone_and_read("https://github.com/getml/getml-community", "How is the fastprop algorithm implemented?")

['/home/julian/dev/ask-my-repo/flask-service/temp/README.md']
['# Julia Set Explorer\n\nThis project allows for the exploration of Julia sets in real time.\nIf you are in need of a screensaver look no further, there are plenty to find here.\n\nTry it yourself: [click me](https://syn-ce.github.io/JuliaSetExplorer/)\n\nOr skip ahead to the [Gallery](#gallery)\n\nThis is still **very much a work in progress** and there\'s a lot left to do, but most of the initially planned features have already been implemented. There are still a couple of bugs in need of fixing as well as a lot of yet-to-be-made considerations for the overall UX, escpecially considering information about what the individual settings and parameters do. If you find yourself struggling to figure out what something does or what it\'s used for, I have made an effort to lay out most explanations in this README, so you\'ll probably find a [section](#table-of-contents) dedicated to it.\nOn the developer\'s side some things have 

In [61]:
clone_and_read("https://github.com/itsmejul/flappy-evolve", "How is the jumping of the birds and the gravity implemented?")

['/home/julian/dev/ask-my-repo/flask-service/temp/README.md', '/home/julian/dev/ask-my-repo/flask-service/temp/index.html', '/home/julian/dev/ask-my-repo/flask-service/temp/style.css']
['# Flappy-Evolve\nAn implementation of the genetic algorithm for learning Flappy Bird, using plain HTML and JavaScript.\nTry it out here:  \nhttps://itsmejul.github.io/flappy-evolve/\n\n## Running locally\nYou can clone this repo via  \n```\ngit clone git@github.com:itsmejul/flappy-evolve.git\n```\nThen, run it by starting a http server, for example using python:\n```\npython -m http.server 8000\n```\nAnd then visit ``http://localhost:8000`` in your browser to see the simulation.\n\n## Evolution of the birds\nEach "bird" consists of a small MLP with just one hidden layer, which will, during inference, receive the selected input features as inputs. The MLP has one output neuron where we use a sigmoid function to decide whether it should activate or not, based on the inputs. We run this inference once eve