In [1]:
from langchain_ollama import ChatOllama, OllamaEmbeddings

llm = ChatOllama(
    model="llama3.2",
    temperature=0.7,
)

embeddings = OllamaEmbeddings(
    model="llama3.2"
)

In [2]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
from langchain_qdrant import QdrantVectorStore

client = QdrantClient(":memory:")

vector_size = len(embeddings.embed_query("test"))

if not client.collection_exists("documents"):
    client.create_collection(
        collection_name="documents",
        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
    )


In [3]:
from mypackage import docroot, filemanager, userinput

# Get input directory from user
_input_dir = docroot.get_input_dir()

# List files in the input directory
file_list = filemanager.get_files_in_directory(_input_dir, extensions=[".pdf"])
filemanager.print_enumerated_file_list(file_list)

# Get selected file from user
selected_file_num = userinput.get_user_input("Select a file by number", default="1")
selected_file_path = file_list[int(selected_file_num) - 1]

# Process document file
file_content_list = filemanager.read_pdf_file(selected_file_path)

1. /Users/andreas/Code/on-prem-llms/input/20200101-NeurIPS-Language_Models_are_few_shot_learners.pdf
2. /Users/andreas/Code/on-prem-llms/input/20171205-Attention_Is_All_You_Need.pdf


In [4]:
import pandas as pd
from mypackage import chunking

df = pd.DataFrame(file_content_list, columns=['page_number', 'text'])

In [5]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   page_number  25 non-null     int64 
 1   text         25 non-null     object
dtypes: int64(1), object(1)
memory usage: 532.0+ bytes


Unnamed: 0,page_number,text
0,1,Language Models are Few-Shot Learners\nTom B. ...
1,2,Figure 1.1: Performance on SuperGLUE increases...
2,3,"2 Approach\nOur basic pre-training approach, i..."
3,4,Setting\nLAMBADA\n(acc)\nLAMBADA\n(ppl)\nStory...
4,5,Setting NaturalQS WebQS TriviaQA\nRAG (Fine-tu...


In [6]:
chunked_content = list(zip(df['page_number'], df['text'].apply(lambda x: chunking.text_chunking(x, chunk_size=512))))
df_chunks = pd.DataFrame(chunked_content, columns=['page_number', 'chunks'])
df_chunks = df_chunks.explode('chunks').reset_index(drop=True)
df_chunks.loc[:, 'chunks'] = df_chunks.loc[:, 'chunks'].apply(lambda x: x.lower())
# df_chunks['chunks_lower'] = df_chunks.loc[:, 'chunks'].apply(lambda x: x.lower())

In [7]:
df_chunks.info()
df_chunks.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   page_number  193 non-null    int64 
 1   chunks       193 non-null    object
dtypes: int64(1), object(1)
memory usage: 3.1+ KB


Unnamed: 0,page_number,chunks
0,1,language models are few-shot learners\ntom b. ...
1,1,abstract\nwe demonstrate that scaling up langu...
2,1,and few-shot demonstrations speciﬁed purely vi...
3,1,to using task-agnostic pre-training and task-a...
4,1,agnostic model to perform a desired task.\nrec...


In [8]:
from typing import List, Optional
from sqlalchemy import String, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column

class Base(DeclarativeBase):
    pass


In [9]:
import os
from sqlalchemy import MetaData, Table, Column, Integer, String, create_engine
from mypackage import docroot

_data_dir = docroot.get_data_dir() + os.sep
db_name = 'documents.db'
db_path = f"{_data_dir}{db_name}"

engine = create_engine(f"sqlite:////{db_path}")

documents_table = Table(
    'documents',
    Base.metadata,
    Column('id', Integer, primary_key=True),
    Column('page_number', Integer),
    Column('text', String),
)
Base.metadata.create_all(engine)

df_chunks.to_sql('documents', con=engine, if_exists='replace')

193

In [10]:
# returns nothing
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("SELECT * FROM documents")).fetchall()

In [11]:
from sqlalchemy import select
from sqlalchemy.orm import Session


metadata = MetaData()
table = Table(
    'documents',
    metadata,
    autoload_with=engine
    )

stmt = select(table).where(table.columns.page_number == 1)

connection = engine.connect()
results = connection.execute(stmt).fetchall()
for result in results:
    print(result)

(0, 1, 'language models are few-shot learners\ntom b. brown∗ benjamin mann∗ nick ryder∗ melanie subbiah∗\njared kaplan† prafulla dhariwal arvind neelakantan  ... (210 characters truncated) ... ric sigler mateusz litwin scott gray\nbenjamin chess jack clark christopher berner\nsam mccandlish alec radford ilya sutskever dario amodei\nabstract')
(1, 1, 'abstract\nwe demonstrate that scaling up language models greatly improves task-agnostic,\nfew-shot performance, sometimes even becoming competitive w ... (188 characters truncated) ... uage model, and test its performance in the few-shot setting. for all\ntasks, gpt-3 is applied without any gradient updates or ﬁne-tuning, with tasks')
(2, 1, 'and few-shot demonstrations speciﬁed purely via text interaction with the model.\ngpt-3 achieves strong performance on many nlp datasets, including t ... (207 characters truncated) ... o training on large web corpora.\n1 introduction\nnlp has shifted from learning task-speciﬁc representations and desi

In [12]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name="documents",
    embedding=embeddings,
)

In [13]:
limit = 5
texts = df_chunks['chunks'].loc[:limit].tolist()
metadatas = [{'page_number': pn} for pn in df_chunks['page_number']]

vector_store.add_texts(texts, metadatas=metadatas)

['0706f8f9ce7f4518bcde1df68192a8c6',
 '899dca2faba94f7786fc0c7f711790f3',
 'd0ca9caf48ae4a7bb5a49696b18ba7e2',
 '6c5ef4200c374e3aabfe3e05aecf5a64',
 'd0cc672cbaab4c1f8b0cb7dc48c62f23',
 'dd23c0dfcbe145b8b42c3429b0d9ae99']

In [14]:
from langchain.tools import tool
from langchain.agents import create_agent

# Construct a tool for retrieving context
@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from a blog post. "
    "Use the tool to help answer user queries."
)
agent = create_agent(llm, tools, system_prompt=prompt)

In [15]:
user_query = userinput.get_user_input("Stelle deine Frage!", default="Welche Informationen enthält das Dokument?")
for step in agent.stream(
    {"messages": [{"role": "user", "content": user_query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Welche Informationen enthält das Dokument?
Tool Calls:
  retrieve_context (89bd9939-11c2-4a2f-a624-3b15122771ed)
 Call ID: 89bd9939-11c2-4a2f-a624-3b15122771ed
  Args:
    query: Inhalt eines Dokuments
Name: retrieve_context

Source: {'page_number': 1, '_id': 'd0ca9caf48ae4a7bb5a49696b18ba7e2', '_collection_name': 'documents'}
Content: and few-shot demonstrations speciﬁed purely via text interaction with the model.
gpt-3 achieves strong performance on many nlp datasets, including translation,
question-answering, and cloze tasks. we also identify some datasets where gpt-
3’s few-shot learning still struggles, as well as some datasets where gpt-3 faces
methodological issues related to training on large web corpora.
1 introduction
nlp has shifted from learning task-speciﬁc representations and designing task-speciﬁc architectures

Source: {'page_number': 1, '_id': '6c5ef4200c374e3aabfe3e05aecf5a64', '_collection_name': 'documents'}
Content: to using task-agnostic pre-training and task-agn