Following Free Code Camps tutorial closely.

In [27]:
import ollama
import numpy as np
import torch

In [4]:
question = "What is a Kalman filter"
document = "Chapter 5: AR and Kalman Filters"

In [12]:
import tiktoken

def num_tokens_from_str(string: str, encoding_name: str) -> int:
    """Returns number of tokens in a text string"""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return encoding.encode(string), num_tokens

In [5]:
num_tokens_from_str(question, "cl100k_base")

6

In [30]:
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

embeddings = OllamaEmbeddings(model="llama3.2")

query_result = embeddings.embed_query(question)
document_result = embeddings.embed_query(document)

In [31]:
len(query_result)

3072

In [29]:
text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts([text],embedding=embeddings)

retriever = vectorstore.as_retriever()
retrieved_documents = retriever.invoke("What is LangChain?")

retrieved_documents[0].page_content

'LangChain is the framework for building context-aware reasoning applications'

In [24]:
single_vector = embeddings.embed_query(text)

print(str(single_vector)[:100])
print(len(single_vector))

[-0.011980836, 0.00060159597, 0.01622734, -0.021190219, -0.0013888354, -0.011334282, -0.011777407, 0
3072


In [25]:
text2 = ("LangGraph is a library for building stateful, multi-actor applications with LLMs")

two_vectors = embeddings.embed_documents([text, text2])
for vector in two_vectors:
    print(str(vector)[:100])  # Show the first 100 characters of the vector
    print(len(vector))

[-0.011980836, 0.00060159597, 0.01622734, -0.021190219, -0.0013888354, -0.011334282, -0.011777407, 0
3072
[-0.0060160398, 0.0022261643, 0.004709511, -0.003964905, -0.0017332155, -0.016549783, -0.004619044, 
3072


In [32]:
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product/(norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print(similarity)

0.6177236244940539


In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Lecture 1 - Basic circuit theory.pdf")
loader2 = PyPDFLoader("Lecture 7- The MOS transistors.pdf")
docs = loader.load()
docs2 = loader2.load()

print(f"Total characters: {len(docs[2].page_content)}")
print(f"Total characters: {len(docs2[5].page_content)}")
print(len(docs2))
print(docs2[5].page_content[:500])

Total characters: 514
Total characters: 659
42
