# PDF파일 로드

In [1]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./PDFS/Concepts.pdf")
pages = loader.load_and_split()

In [2]:
pages

[Document(page_content='The Concepts section helps you learn about the parts of the Kubernetes system and the\nabstractions Kubernetes uses to represent your cluster , and helps you obtain a deeper\nunderstanding of how Kubernetes works.\nOverview\nKubernetes is a portable, extensible, open source platform for managing containerized\nworkloads and services, that facilitates both declarative configuration and automation. It has a\nlarge, rapidly growing ecosystem. Kubernetes services, support, and tools are widely available.\nCluster Architecture\nThe architectural concepts behind Kubernetes.\nContainers\nTechnology for packaging an application along with its runtime dependencies.\nWorkloads\nUnderstand Pods, the smallest deployable compute object in Kubernetes, and the higher-level\nabstractions that help you to run them.\nServices, Load Balancing, and Networking\nConcepts and resources behind networking in Kubernetes.\nStorage\nWays to provide both long-term and temporary storage to P

# PDF내용 분리

In [3]:
from langchain.text_splitter import CharacterTextSplitter
text_spliter = CharacterTextSplitter(
  separator="\n",
  chunk_size=1000,
  chunk_overlap=100,
  length_function=len,
)

texts = text_spliter.split_documents(pages)

In [4]:
texts

[Document(page_content='The Concepts section helps you learn about the parts of the Kubernetes system and the\nabstractions Kubernetes uses to represent your cluster , and helps you obtain a deeper\nunderstanding of how Kubernetes works.\nOverview\nKubernetes is a portable, extensible, open source platform for managing containerized\nworkloads and services, that facilitates both declarative configuration and automation. It has a\nlarge, rapidly growing ecosystem. Kubernetes services, support, and tools are widely available.\nCluster Architecture\nThe architectural concepts behind Kubernetes.\nContainers\nTechnology for packaging an application along with its runtime dependencies.\nWorkloads\nUnderstand Pods, the smallest deployable compute object in Kubernetes, and the higher-level\nabstractions that help you to run them.\nServices, Load Balancing, and Networking\nConcepts and resources behind networking in Kubernetes.\nStorage\nWays to provide both long-term and temporary storage to P

# Load Embedding

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

Embeddings loaded


# Save embeddings to vectorDB

In [6]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(texts, embeddings)

# Use Chatbot from verctorDB

In [None]:
from langchain.llms import LlamaCpp
model = LlamaCpp(
  model_path="../llama-2-7b-chat.Q5_K_M.gguf",
  temperature=0.0,
  top_p=1,
  max_tokens=8192,
  verbose=True,
  n_ctx=4096,
)

In [None]:
prompt_template = """You're a Kubernetes expert. You understand the question and can generate a good kubernetes manifest.
Question: {question}"""