In [None]:
%pip install langgraph langchain transformers sentence-transformers torch
%pip install -qU langchain-chroma
%pip install huggingface_hub[hf_xet]

In [7]:
from typing import TypedDict, List
from langgraph.graph import StateGraph, END
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
import numpy as np

In [3]:
# Defining agent state
class AgentState(TypedDict):
    user_query: str
    retrieved_docs: List[str]
    final_answer: str

# Load Hugging Face LLM
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

hf_pipeline = pipeline("text-generation",
                       model=model,
                       tokenizer=tokenizer,
                       max_new_tokens = 300,
                       temperature = 0.2
                       )

llm = HuggingFacePipeline(pipeline=hf_pipeline)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.88s/it]


In [None]:
# Define Embedding Model
embedder = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

# Provide the existing chroma vector store path
VECTOR_DB_PATH = "../Data/vectorstore"
vector_store = Chroma(
    collection_name= "pdf_documents",  # Provide your collection name. Not sure of collection name, use the below code
    # print(vector_store._collection.count())
    persist_directory= VECTOR_DB_PATH,
    embedding_function= embedder
)




In [15]:
results = vector_store.similarity_search("What is machine learning", k=1)
print(results[0].page_content)


ARCH2021.1 Shapiro_Machine Learning ... 00e6 1 
Machine Learning: what is it and what are its components? 
-- some preliminary observations1 
 
Arnold F. Shapiro 
Penn State University, Smeal College of Business, University Park, PA 16802, USA 
Abstract 
This article focuses on conceptualizing machine learning (ML) concepts.  The general topics 
covered are supervised learning based on regression and classification, unsupervised 
learning based on clustering and dimensionality reduction, and rei


In [None]:
def retrieve_node(query: str, k: int=3):
    results = vector_store.similarity_search(
        query = query,
        k = k
    )
    return []