In [None]:
from afmchain.embeddings import AjaxEmbeddings
from afmchain.api_key import generate_api_key
import numpy as np

api_key = generate_api_key()
embeddings = AjaxEmbeddings(ajax_api_key=api_key)
query_emb = embeddings.embed_query("tell me the rule of american football")
doc_embs = embeddings.embed_documents(
    [
        "American football is played by two teams of 11 players each. The objective is to score points by advancing the ball into the opposing team's end zone or kicking it through their goalposts. Teams have four attempts, called downs, to move the ball at least 10 yards."
    ]
)
print(np.dot(query_emb, doc_embs[0]))

query_emb = embeddings.embed_query("tell me the rule of american football")
doc_embs = embeddings.embed_documents(
    ["first product of apple is the Apple I Computer"]
)
print(np.dot(query_emb, doc_embs[0]))

In [111]:
from langchain.chains import LLMChain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter, Document, RecursiveCharacterTextSplitter, Language

def build_emb_docsearch_from_notes(notes):
    """
    Build index with embedding
    """
    # define a text splitter
    # text_splitter = CharacterTextSplitter(
    #     chunk_size=1024, chunk_overlap=0, separator=" "
    # )

    code_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.SWIFT, chunk_size=256, chunk_overlap=0
    )
    # construct document for each note body text
    documents = [Document(page_content=note, metadata={'file':file}) for file, note in notes.items()]
    # split texts
    # texts = text_splitter.split_documents(documents)
    texts = code_splitter.split_documents(documents)

    # create vector index
    db = Chroma(persist_directory="./chroma_db")
    docsearch = db.from_documents(
        texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]
    )
    db.persist()
    return docsearch.as_retriever(search_type="mmr")

import os
# write a function to recursively read in all the files in a repository given the folder path
# and return a list of strings
def read_files(folder_path):
    txts = {}
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".swift"):  # Check if file has .swift extension
                try:
                    with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                        txts[file] = f.read()
                except:
                    pass
    return txts


# code_files = read_files("/Users/ammarh/Documents/second-brain/vault-management/second-brain-web-search/")
code_files = read_files("/Users/ammarh/j595/Locomotion")

from itertools import islice
first_10 = dict(islice(code_files.items(), 10))
print(first_10.keys())

db = build_emb_docsearch_from_notes(first_10)


dict_keys(['Package.swift', 'main.swift', 'KinematicsPoseProviderNode.swift', 'KinematicsInterface.swift', 'DomoKinematicsWrapper.swift', 'PoseProviderLogging.swift', 'casadiWrapper.swift', 'LotusPoseProvider.swift', 'locomotionctl.swift', 'LocomotionKitAssetsTests.swift'])


In [109]:
db2 = Chroma.load_from_disk("./chroma_db")


AttributeError: 'VectorStoreRetriever' object has no attribute 'persist'

In [107]:
# query it
query = "Ammar Husain"
# docs = db.vectorstore.similarity_search_with_score(query)
# print(docs[0][1])
# print results
# for doc in docs:
#     print(f"results - {doc[0].metadata} ... score= {doc[1]} ... {doc[0].page_content}")

docs = db.get_relevant_documents(query)
for doc in docs:
    print(f"results - {doc.metadata} ... ")


results - {'file': 'casadiWrapper.swift'} ... 
results - {'file': 'LotusPoseProvider.swift'} ... 
results - {'file': 'locomotionctl.swift'} ... 
results - {'file': 'KinematicsInterface.swift'} ... 
