In [None]:
#installing ollama
#!pip install ollama
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
#starting ollama server locally
import subprocess
import time
process = subprocess.Popen("ollama serve", shell=True)
time.sleep(5)  # Wait for 5 seconds

In [None]:
#pulling llama3 using ollama
!ollama pull llama3

In [None]:
#testing llama 3 is availble
!ollama list

In [None]:
!pip install langchain_community langchain langchain-openai langchain_pinecone langchain[docarray] docarray pydantic==1.10.8 pytube python-dotenv tiktoken pinecone-client scikit-learn ruff pypdf faiss-cpu


In [None]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import DocArrayInMemorySearch
from operator import itemgetter

In [None]:

#setting up model dynamically
#from dotenv import load_dotenv

#load_dotenv()

#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#MODEL = "gpt-3.5-turbo"
#MODEL = "mixtral:8x7b"
MODEL = "llama3"

VECTOR_STORE = "default"
#VECTOR_STORE = "FAISS"
#VECTOR_STORE = "pinecone"

In [None]:
#loading model based on selection
if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

#testing model invoke
model.invoke("what is ML")

In [None]:
#creating parser
parser = StrOutputParser()

chain = model | parser

#testing model invoke with parser
chain.invoke("what is ML")

In [None]:
#creating template
template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

#testing prompt
prompt.format(context="Here is some context", question="Here is a question")

In [None]:
#loading pdf files
loader = PyPDFLoader("/content/gxocompany.pdf")
pages = loader.load_and_split()
pages

In [None]:
#loading vector store based on selection
if VECTOR_STORE == "FAISS":
    vectorstore = FAISS.from_documents(pages, embeddings)
else:
    vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings, verbose=True)

In [None]:
#setting up retriver
retriever = vectorstore.as_retriever()

#testing retriver
retriever.invoke("balance sheets")

In [None]:
#chaining all operations
chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [None]:
#posting questions
questions = [
    "provide finanacial analysis as underwriter in financial company by going to complete context that has in company annual reports"
    #"how is the revenue growth of company in 2023"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()