In [None]:
import yaml
from toolkit.langchain import models
import add_packages
from pprint import pprint
import os, re
import pandas as pd
# import tqdm
from tqdm.auto import tqdm

from toolkit.langchain import (
	document_loaders, text_splitters, text_embedding_models, stores, 
	prompts, utils, output_parsers, agents, documents,
	runnables, agent_tools
)

PATH_DATA = f"{add_packages.APP_PATH}/data/..."
FILE_CFG = "....yaml"
tqdm.pandas(desc="Processing")

with open(f"{add_packages.APP_PATH}/my_configs/{FILE_CFG}", 'r') as file:
    configs = yaml.safe_load(file)

# Data

## txt - FAQ

### File 1

In [None]:
path_txt = f"{PATH_DATA}/faq.txt"

In [None]:
loader_txt = document_loaders.TextLoader(path_txt)
doc_txt = loader_txt.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	# chunk_size=500, chunk_overlap=100,
	separators=["##"], chunk_size=150, chunk_overlap=0,
)
docs_txt = text_splitter.split_documents(doc_txt)
docs_txt = docs_txt[1:]

metadatas = {
	"data": "frequently asked questions"
}
utils.remove_metadata(docs_txt, "source")
utils.update_metadata(docs_txt, metadatas)

### File 2

In [None]:
path_txt = f"{PATH_DATA}/faq.txt"

In [None]:
loader_txt = document_loaders.TextLoader(path_txt)
doc_txt = loader_txt.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	# chunk_size=500, chunk_overlap=100,
	separators=["##"], chunk_size=150, chunk_overlap=0,
)
docs_txt = text_splitter.split_documents(doc_txt)
docs_txt = docs_txt[1:]

metadatas = {
	"data": "frequently asked questions"
}
utils.remove_metadata(docs_txt, "source")
utils.update_metadata(docs_txt, metadatas)

## csv

### File 1

In [None]:
file_csv = "..."
path_csv = f"{PATH_DATA}/{file_csv}"
path_csv_processed = f"{PATH_DATA}/{file_csv.split('.')[0]}1.csv"

In [None]:
df = pd.read_csv(
	path_csv, delimiter=";"
)

df.head()

#### Process

In [None]:
model = models.chat_openai

template1 = """\
...
{text}"""

template2 = """\
...
{text}
"""

prompt_template1 = prompts.PromptTemplate.from_template(template1)
prompt_template2 = prompts.PromptTemplate.from_template(template2)

chain1 = prompt_template1 | model | output_parsers.StrOutputParser()
chain2 = prompt_template2 | model | output_parsers.StrOutputParser()

chain = runnables.RunnablePassthrough.assign(
  text=chain1
).assign(
  text=chain2
)

def process_csv_col(text: str) -> str:
  result = chain.invoke({"text": text})['text']
  return result

query = '...'
result = process_csv_col(query)

pprint(result)

In [None]:
df = pd.read_csv(path_csv)

col_to_process = "..."

df[col_to_process] = df[col_to_process].progress_apply(process_csv_col)

df.to_csv(f"{PATH_DATA}/{path_csv_processed}", index=False)


In [None]:
path_csv = path_csv_processed

#### Load

In [None]:
csv_cols = utils.get_csv_column_names(path_csv)

loader_csv = document_loaders.CSVLoader(
	path_csv,
	# source_column="No",
	csv_args={
		"delimiter": ",", # ",", ";"
		# "quotechar": "''",
		"fieldnames": csv_cols,
	},
)
docs_csv = loader_csv.load()[1:]

metadatas = {
	"data": "..."
}

utils.remove_metadata(docs_csv, "source")
utils.remove_metadata(docs_csv, "row")
utils.update_metadata(docs_csv, metadatas)

### File 2

In [None]:
file_csv = "..."
path_csv = f"{PATH_DATA}/{file_csv}"
path_csv_processed = f"{file_csv.split('.')[0]}1.csv"

In [None]:
df = pd.read_csv(
	path_csv, delimiter=";"
)

df.head()

#### Process

In [None]:
model = models.chat_openai

template1 = """\
...
{text}"""

template2 = """\
...
{text}
"""

prompt_template1 = prompts.PromptTemplate.from_template(template1)
prompt_template2 = prompts.PromptTemplate.from_template(template2)

chain1 = prompt_template1 | model | output_parsers.StrOutputParser()
chain2 = prompt_template2 | model | output_parsers.StrOutputParser()

chain = runnables.RunnablePassthrough.assign(
  text=chain1
).assign(
  text=chain2
)

def process_csv_col(text: str) -> str:
  result = chain.invoke({"text": text})['text']
  return result

query = '...'
result = process_csv_col(query)

pprint(result)

In [None]:
df = pd.read_csv(path_csv)

col_to_process = "..."

df[col_to_process] = df[col_to_process].progress_apply(process_csv_col)

df.to_csv(f"{PATH_DATA}/{path_csv_processed}", index=False)


In [None]:
path_csv = path_csv_processed

#### Load

In [None]:
csv_cols = utils.get_csv_column_names(path_csv)

loader_csv = document_loaders.CSVLoader(
	path_csv,
	# source_column="No",
	csv_args={
		"delimiter": ",", # ",", ";"
		# "quotechar": "''",
		"fieldnames": csv_cols,
	},
)
docs_csv = loader_csv.load()

metadatas = {
	"data": "..."
}

utils.remove_metadata(docs_csv, "source")
utils.remove_metadata(docs_csv, "row")
utils.update_metadata(docs_csv, metadatas)

# Vector store 

Note:
- `tiktoken` >= 0.6.0

## csv

### File 1

In [None]:
qdrant_csv = stores.QdrantWrapper(
	qdrant_host=os.getenv("QDRANT_HOST"),
	qdrant_api_key=os.getenv("QDRANT_API_KEY"),
	configs=configs,
	**configs["vector_db"]["qdrant"]["..."]
)

In [None]:
qdrant_csv.add_documents(docs_csv)

### File 2

In [None]:
qdrant_csv = stores.QdrantWrapper(
	qdrant_host=os.getenv("QDRANT_HOST"),
	qdrant_api_key=os.getenv("QDRANT_API_KEY"),
	configs=configs,
	**configs["vector_db"]["qdrant"]["..."]
)

In [None]:
qdrant_csv.add_documents(docs_csv)

## txt

### File 1

In [None]:
qdrant_txt = stores.QdrantWrapper(
  qdrant_host=os.getenv("QDRANT_HOST"),
  qdrant_api_key=os.getenv("QDRANT_API_KEY"),
  configs=configs,
  **configs["vector_db"]["qdrant"]["..."]
)

In [None]:
qdrant_txt.add_documents(docs_txt)

### File 2

In [None]:
qdrant_txt = stores.QdrantWrapper(
  qdrant_host=os.getenv("QDRANT_HOST"),
  qdrant_api_key=os.getenv("QDRANT_API_KEY"),
  configs=configs,
  **configs["vector_db"]["qdrant"]["..."]
)

In [None]:
# qdrant_txt.add_documents(docs_txt)

# Test

In [None]:
llm = models.chat_openai

tools = [
	agent_tools.TavilySearchResults(max_results=3),
	qdrant_txt.retriever_tool,
	qdrant_csv.retriever_tool,
]

system_message_custom = configs["prompts"]["system_message_onlinica"]
prompt = prompts.create_prompt_tool_calling_agent(system_message_custom)

agent = agents.MyStatelessAgent(
	llm=llm,
	tools=tools,
	prompt=prompt,
	agent_type=configs["agents"]["type"],
	agent_verbose=False,
)

In [None]:
questions = [

]


In [None]:
input_message = questions[1]
# await agent.stream_conversable_agent(questions[2])
result = agent.invoke_agent(input_message)
# await agent.stream_agent(input_message)
pprint(result)