In [None]:
!pip install sentence-transformers langchain chromadb pypdf faiss-cpu  langchain_community scikit-learn matplotlib seaborn numpy


Collecting chromadb
  Downloading chromadb-1.3.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pypdf
  Downloading pypdf-6.4.1-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting langchain_community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Coll

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS,Chroma
import os


In [None]:
from google.colab import files
uploaded = files.upload()

pdf_path = list(uploaded.keys())[0]
print(f"Uploaded file: {pdf_path}")


Saving git-cheat-sheet-education.pdf to git-cheat-sheet-education.pdf
Uploaded file: git-cheat-sheet-education.pdf


In [None]:
loader = PyPDFLoader(pdf_path)
documents = loader.load()

print("Total pages loaded:", len(documents))




Total pages loaded: 2


In [None]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

chunks = splitter.split_documents(documents)
print("Total chunks created:", len(chunks))


Total chunks created: 12


In [None]:
print(chunks[0].page_content[:500])


GIT CHEAT SHEET
STAGE & SNAPSHOT
Working with snapshots and the Git staging area
git status
show modiﬁed ﬁles in working directory, staged for your next commit
git add [file]
add a ﬁle as it looks now to your next commit (stage)
git reset [file]
unstage a ﬁle while retaining the changes in working directory
git diff
diﬀ of what is changed but not staged
git diff --staged
diﬀ of what is staged but not yet committed
git commit -m “[descriptive message]”


In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


### **FAISS**

In [None]:
faiss_db = FAISS.from_documents(chunks, embedding_model)
print("FAISS Vector DB created!")


FAISS Vector DB created!


In [None]:
query = "What is git push?"
results = faiss_db.similarity_search(query, k = 3)

for r in results:
    print(r.page_content[:200])
    print("---")


git remote add [alias] [url]
add a git URL as an alias
git fetch [alias]
fetch down all the branches from that Git remote
git merge [alias]/[branch]
merge a remote branch into your current branch to b
---
related that happens locally on your computer. This cheat sheet features the most important and commonly 
used Git commands for easy reference.
INSTALLATION & GUIS
With platform speciﬁc installers for
---
git diff --staged
diﬀ of what is staged but not yet committed
git commit -m “[descriptive message]”
commit your staged content as a new commit snapshot
SETUP
Conﬁguring user information used across al
---


### **Chroma Vector Store**

In [None]:
chroma_dir = "./chroma_db"
if not os.path.exists(chroma_dir):
    os.makedirs(chroma_dir)

chroma_db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    persist_directory=chroma_dir
)

print("Chroma DB created and persisted!")


Chroma DB created and persisted!


In [None]:
query = "What is git push?"
results = chroma_db.similarity_search(query, k=3)

for r in results:
    print(r.page_content[:200])
    print("---")


git remote add [alias] [url]
add a git URL as an alias
git fetch [alias]
fetch down all the branches from that Git remote
git merge [alias]/[branch]
merge a remote branch into your current branch to b
---
related that happens locally on your computer. This cheat sheet features the most important and commonly 
used Git commands for easy reference.
INSTALLATION & GUIS
With platform speciﬁc installers for
---
git diff --staged
diﬀ of what is staged but not yet committed
git commit -m “[descriptive message]”
commit your staged content as a new commit snapshot
SETUP
Conﬁguring user information used across al
---
