In [1]:
from langchain_community.llms import Ollama
from git import Repo
from typing import Any, Dict
from langchain.callbacks.base import BaseCallbackHandler
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language
from langchain_community.vectorstores.chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [37]:
!pip uninstall tree-sitter -y

Found existing installation: tree-sitter 0.21.3
Uninstalling tree-sitter-0.21.3:
  Successfully uninstalled tree-sitter-0.21.3


In [2]:
!pip install tree-sitter==0.21.3

Collecting tree-sitter==0.21.3
  Using cached tree_sitter-0.21.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Using cached tree_sitter-0.21.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (498 kB)
Installing collected packages: tree-sitter
Successfully installed tree-sitter-0.21.3


In [3]:
repo_path = "app/repo"
app_dir = "/libs/core/langchain_core"
persist_dir = "app/chroma_db"
programming_suffix = ".py"
model_name_llm = "llama3"
model_name_retriever = "all-minilm"
model_base_url = "http://localhost:11434"

loader = GenericLoader.from_filesystem(
    repo_path + app_dir,
    glob="**/*",
    suffixes=[programming_suffix],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(language=Language.PHP, parser_threshold=500),
)
documents = loader.load()
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PHP, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
print("Amount of chunks: " + str(len(texts)))
db2 = Chroma.from_documents(
    texts,
    OllamaEmbeddings(model=model_name_retriever, base_url=model_base_url),
    persist_directory=persist_dir,
    collection_name="v_db",
)
print("getting documents done")
db2.persist()
print("persisting done")



Amount of chunks: 1017


KeyboardInterrupt: 

In [10]:
class MyCustomHandler(BaseCallbackHandler):
    def on_text(self, text: str, **kwargs: Any) -> Any:
        print(f"Text: {text}")
        self.log = text

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        """Run when chain starts running."""
        print("Chain started running")


handler = MyCustomHandler()

from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

model_name_llm = "llama3"
model_name_retriever = "all-minilm"
model_base_url = "http://localhost:11434"
llm = Ollama(
    model=model_name_llm, base_url=model_base_url
    )



persist_dir = "app/chroma_db"

"""db3 = Chroma(
    collection_name="v_db",
    persist_directory=persist_dir,
    embedding_function=OllamaEmbeddings(
        model=model_name_retriever, base_url=model_base_url
    ),
)"""
print("Retrieving from disk done")

Retrieving from disk done


In [14]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [21]:
from langchain.chains.api.base import APIChain

In [23]:
chain = prompt | llm

In [32]:
from langchain_core.messages import HumanMessage
from langchain_core.callbacks import StdOutCallbackHandler
from typing import Optional, List

class CallbackHandlerWithPrompt(StdOutCallbackHandler):
    def on_chain_start(self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any):
        print(inputs)
        
    def on_llm_end(self, response, **kwargs: Any) -> None:
        texts = "\n".join([" ".join([l.text for l in lst]) for lst in response.generations])
        self.on_text(texts)
    
handler = CallbackHandlerWithPrompt()
config = {"callbacks" : [handler]}

chain.invoke({"messages": [HumanMessage(content="hi! I'm bob")]},config=config)

{'messages': [HumanMessage(content="hi! I'm bob")]}
{'messages': [HumanMessage(content="hi! I'm bob")]}

[1m> Finished chain.[0m


ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [17]:
response

"Hi Bob! It's great to meet you! I'm here to help with any questions or topics you'd like to discuss. What's on your mind today? Do you have a specific question or topic in mind, or are you open to suggestions?"

In [13]:
prompt = PromptTemplate.from_template("1 + {number} = ")
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
chain.invoke({"number":2})

  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


{'number': 2, 'text': '3'}

In [9]:
llm.invoke("hello")

Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?

"Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?"

In [5]:
db3 = Chroma(
    collection_name="v_db",
    persist_directory=persist_dir,
    embedding_function=OllamaEmbeddings(
        model=model_name_retriever, base_url=model_base_url
    ),
)

In [11]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
        (
            "user",
            "Given the above conversation, generate a search query to look up to get information relevant to the conversation",
        ),
    ]
)

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

In [13]:
retriever_chain.invoke({"input" : "What are ClaimViews"})

[]