# Apocalypse Now Chatbot

## Imports

In [1]:
import os
import pandas as pd 
import json
import argparse
import shutil
from langchain.schema import Document
from langchain_google_vertexai import VertexAI
from langchain_google_vertexai.model_garden import ChatAnthropicVertex
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores.chroma import Chroma

## Credentials

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'gcp-nyc-ce93903e254d.json'

PROJECT_ID = "gcp-nyc"
REGION = "us-central1"

## Initializations

In [3]:
DATA_PATH = 'data'
CHROMA_PATH = 'chroma'

embedding_function = VertexAIEmbeddings("textembedding-gecko")
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

llm = ChatAnthropicVertex(
    model_name="claude-3-sonnet@20240229",
    temperature=0.5,
    project=PROJECT_ID,
    location=REGION
)

In [4]:
def load_documents():
    loader = DirectoryLoader(DATA_PATH, glob="*.md")
    documents = loader.load()
    return documents


def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=500,
        length_function=len,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    
    return chunks 


def save_to_chroma(chunks: list[Document]):
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)
        
    db = Chroma.from_documents(
        chunks, VertexAIEmbeddings("textembedding-gecko"), persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}")
    return
    
    
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)
    return
   

In [5]:
def load_documents():
    loader = DirectoryLoader(DATA_PATH, glob="*.md")
    documents = loader.load()
    return documents


def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=500,
        length_function=len,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    
    return chunks 


def save_to_chroma(chunks: list[Document]):
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)
        
    db = Chroma.from_documents(
        chunks, VertexAIEmbeddings("textembedding-gecko"), persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}")
    return
    
    
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)
    return
   

## Main

In [22]:
def main():
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument("query_text", type=str, help="The query text.")
    args = parser.parse_args()
    '''
    
    query_text = "Who is Colonel Kurtz?"
    
    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0:
        print(f"Uable to find matching results.")
        return 
    
    context_text = "\n\n---\n\n".join(doc.page_content for doc, _score in results)
    print(context_text)       

## Main

In [23]:
if __name__ == "__main__":
    main()

The Colonel accidentally drops the dossier. Papers, photos,

etc., scatter all over the floor. He stoops down to pick

them up.

COLONEL

Jesus...Operations officer, Fifth

Special Forces.

GENERAL

Luke, would you play that tape,

for the captain, please?

(to Willard)

Listen to it carefully, Captain.

The Colonel moves to a tape recorder and turns it on.

MALE VOICE (ON TAPE) (V.O.)

"October 9, 04:30 hours, Sector

Peter, Victor, King."

GENERAL

These were monitored out of

Cambodia. It's been verified as

Colonel Kurtz's voice.

All the men, including Willard, listen in wonder.

KURTZ (ON TAPE) (V.O.)

"I watched a small snail, crawling

on the edge of a straight razor.

That's my dream. It's my nightmare.

Crawling, slithering, along the

edge of a straight razor, and

surviving."

MALE VOICE (ON TAPE) (V.O.)

"Transmission 11, received '68,

December 30, 05:00 hours, Sector

King, Zulu, King".

KURTZ (ON TAPE) (V.O.)

"But we must kill them. We must

---

pass both ways.

(to W