# CHATBOP LANGCHAIN

Documentation: https://python.langchain.com/v0.1/docs/get_started/introduction

In [5]:
import argparse
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader

In [1]:
#!pip install langchain
# !pip install chromadb
# !pip install pypdf
# !pip install pytest
#! pip install boto3

Collecting pytest
  Downloading pytest-8.2.1-py3-none-any.whl.metadata (7.6 kB)
Collecting iniconfig (from pytest)
  Downloading iniconfig-2.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting pluggy<2.0,>=1.5 (from pytest)
  Downloading pluggy-1.5.0-py3-none-any.whl.metadata (4.8 kB)
Downloading pytest-8.2.1-py3-none-any.whl (339 kB)
   ---------------------------------------- 0.0/339.6 kB ? eta -:--:--
   ---- ---------------------------------- 41.0/339.6 kB 960.0 kB/s eta 0:00:01
   -------------- ------------------------- 122.9/339.6 kB 1.4 MB/s eta 0:00:01
   ---------------------- ----------------- 194.6/339.6 kB 1.5 MB/s eta 0:00:01
   -------------------------------- ------- 276.5/339.6 kB 1.5 MB/s eta 0:00:01
   ---------------------------------------- 339.6/339.6 kB 1.6 MB/s eta 0:00:00
Downloading pluggy-1.5.0-py3-none-any.whl (20 kB)
Downloading iniconfig-2.0.0-py3-none-any.whl (5.9 kB)
Installing collected packages: pluggy, iniconfig, pytest
  Attempting uninstall: pluggy
  

In [2]:
pwd

'c:\\Users\\incar\\PycharmProjects\\chatbot_ri'

## Load document

In [7]:
def load_documents(DATA_PATH):
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

documents = load_documents('c:\\Users\\incar\\PycharmProjects\\chatbot_ri\\data')

In [9]:
len(documents)

455

## Split document

In [11]:
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)

chunks = split_documents(documents)

In [13]:
len(chunks)

2906

## Embedding function

Different embeddings can be used like local version with Ollama

In [15]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.embeddings.bedrock import BedrockEmbeddings


def get_embedding_function():
    embeddings = BedrockEmbeddings(
        credentials_profile_name="default", region_name="us-east-1"
    )
    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
    return embeddings


## Create the database

In [None]:
def add_to_chroma(chunks: list[Document]):
    # Load the existing database.
    db = Chroma(
        persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()
    )

    # Calculate Page IDs.
    chunks_with_ids = calculate_chunk_ids(chunks)

    # Add or Update the documents.
    existing_items = db.get(include=[])  # IDs are always included by default
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    # Only add documents that don't exist in the DB.
    new_chunks = []
    for chunk in chunks_with_ids:
        if chunk.metadata["id"] not in existing_ids:
            new_chunks.append(chunk)

    if len(new_chunks):
        print(f"👉 Adding new documents: {len(new_chunks)}")
        new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
        db.add_documents(new_chunks, ids=new_chunk_ids)
        db.persist()
    else:
        print("✅ No new documents to add")

In [3]:
from functions import populate_database 
import importlib
importlib.reload(populate_database)
populate_database.main()


Number of existing documents in DB: 0
👉 Adding new documents: 2906


  warn_deprecated(


In [4]:
from functions import query_data 
import importlib
importlib.reload(query_data)
query_data.main(query_text="Hello, can you tell me what is portfolio investment debt securities?")


Human: 
Answer the question based only on the following context:

7.29 Positions in unlisted portfolio investment 
equity securities without an observable market price may be valued using methods discussed in paragraphs 7.16–7.17 for direct investment equity. Some listed debt securities also may have no quoted prices, for example, if the market is illiquid or the security ceases trading due to suspension, default, or bankruptcy. A market price can be estimated for such debt securi-ties by discounting future cash flows using a discount rate that takes into account the risk of default (present value approach).
5. Debt securities at nominal values
7.30 Whereas the basic valuation method for debt 
securities is the market value, the nominal value is encouraged as a supplementary item. External Debt 
Statistics: Guide for Compilers and Users  recom-

---

Equity may be split into listed shares, unlisted shares, 
and other equity (paragraph 5.24).
Investment fund shares and money market fund

### Test

In [2]:
from gpt4all import GPT4All
model = GPT4All(model_name='Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf',  
                model_path='C:\\Users\\incar\\AppData\\Local\\nomic.ai\\GPT4All', 
                allow_download=False)
with model.chat_session():
    response1 = model.generate(prompt='hello', temp=0)
    response2 = model.generate(prompt='write me a short poem', temp=0)
    response3 = model.generate(prompt='thank you', temp=0)
    print(model.current_chat_session)

[{'role': 'system', 'content': ''}, {'role': 'user', 'content': 'hello'}, {'role': 'assistant', 'content': 'Hello! How can I assist you today? If you have any questions or need help, feel free to ask.\n\n'}, {'role': 'user', 'content': 'write me a short poem'}, {'role': 'assistant', 'content': "In the realm of words and thoughts we dwell,\nA poet's heart, where dreams do often tell,\nOf love and loss, of joy and pain,\nWe craft verses that forever remain.\n\nWith each word chosen with care and grace,\nThe rhythm flows like a gentle embrace,\nAs lines unfold in sweet harmony,\nOur souls find solace, our hearts set free."}, {'role': 'user', 'content': 'thank you'}, {'role': 'assistant', 'content': "You're welcome! I'm always here to help or provide inspiration when needed. If there is anything else you would like, feel free to ask."}]
