In [None]:
! pip install -U sentence-transformers
! pip install --upgrade --quiet  spacy
! python -m spacy download en_core_web_sm 
! pip install langchain-cohere
! pip install PyMuPDF
! pip install fitz
! pip install langchain
! pip install chromadb
! pip install langchain_openai
! pip install pypdf
! pip install python-dotenv

## Imports and Env Variables

In [1]:
import os
from dotenv import load_dotenv
from frontendhelper import check_env_variables

load_dotenv()
check_env_variables()

from PdfAnnotator import PdfAnnotator
from OutputParser import OutputParser
from RagApplication import RagApplication

## Initialize Rag App

In [2]:
rag_app = RagApplication()

In [3]:
question = "What is the nature of business for Netflix?" 
response = rag_app.ask_question(question)
print(response)

Netflix is a leading entertainment service offering TV series, films, and games globally.

Document: Netflix 10k.pdf | Page Number: 2-3


In [4]:
refined_response = rag_app.refine_output(response, question)
print(refined_response)

Netflix, Inc. (“Netflix”, “the Company”, “registrant”, “we”, or “us”) is one of the world’s leading entertainment services with over 260 million paid memberships in over 190 countries enjoying TV series, films and games across a wide variety of genres and languages.

Document: Netflix 10k.pdf | Page Number: 2-3


In [8]:
import time
questions = []
for i in range(25):
    questions.append('What is the nature of business for Netflix?')
responses = []
refined_responses = []
for i, q in enumerate(questions):
    if i%5 == 0:
        time.sleep(30)
    response = rag_app.ask_question(q)
    refined_response = rag_app.refine_output(response, q)
    responses.append(response)
    refined_responses.append(refined_response)


In [9]:
responses

['Netflix is a global entertainment service providing TV series, films, and games.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix is a leading entertainment service offering TV series, films, and games globally.\n\nDocument: Netflix 10k.pdf | Page Number: 2',
 'Netflix is a leading entertainment service offering TV series, films, and games globally.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix is a global entertainment service with over 260 million paid memberships offering TV series, films, and games.\n\nDocument: Netflix 10k.pdf | Page Number: 2',
 'Netflix is a leading entertainment service offering TV series, films, and games globally.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix is an entertainment service with TV series, films, and games.\n\nDocument: Netflix 10k.pdf | Page Number: 2',
 'Netflix is a global entertainment service offering TV series, films, and games.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix is a leading

In [10]:
refined_responses

['Netflix, Inc. ("Netflix", "the Company", "registrant", "we", or "us") is one of the world\'s leading entertainment services with over 260 million paid memberships in over 190 countries enjoying TV series, films and games across a wide variety of genres and languages.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix, Inc. (“Netflix”, “the Company”, “registrant”, “we”, or “us”) is one of the world’s leading entertainment services with over 260 million paid memberships in over 190 countries enjoying TV series, films and games across a wide variety of genres and languages.\n\nDocument: Netflix 10k.pdf | Page Number: 2',
 'Netflix, Inc. (“Netflix”, “the Company”, “registrant”, “we”, or “us”) is one of the world’s leading entertainment services with over 260 million paid memberships in over 190 countries enjoying TV series, films and games across a wide variety of genres and languages.\n\nDocument: Netflix 10k.pdf | Page Number: 2-3',
 'Netflix, Inc. (“Netflix”, “the Company”, “

In [None]:
i = 1
for a, b in zip(responses, refined_responses):
    print(f"Attempt {i}")
    print("First Response\n")
    print(a)
    print('\n\n')
    print(f'Refined Response:\n')
    print(b)
    print("------------------------------------")
    i+=1

In [None]:
rag_app.vectorstore_files

In [None]:
rag_app.vectorstore.get().keys()

In [None]:
for pg in rag_app.vectorstore.get()['documents']:
    if 'Speiser' in pg:
        print(pg)

In [None]:
rag_app.compressed_vector_search(question)

In [None]:
parser = OutputParser(refined_response)

In [None]:
parser.get_values()

In [None]:
for d in docs:
    print(d.metadata)

In [None]:
parser = OutputParser(refined_response)

In [None]:
parser.get_values()

In [None]:
parser.get_pages()

In [None]:
meta = rag_app.vectorstore.get()['metadatas']
for ent in meta:
    try:
        if ent['file path'] == 'nike 10k.pdf':
            print(ent)
    except Exception as err:
        print(ent)

In [None]:
rag_app.vectorstore.get()['metadatas']

In [None]:
rag_app.vectorstore_files

In [None]:
question = "Who signed the starbucks document?"
response = rag_app.ask_question(question)
print(response)

In [None]:
refined = rag_app.refine_output(response, question)
print(refined)

In [None]:
parser = OutputParser(refined)


In [None]:
parser.get_file()

In [None]:
rag_app = RagApplication()
session_state = {
    'annotators':{},
    'parser':parser,
    'response':refined,
    'sessionId':'abc'
}

In [None]:
def annotate_pdf_temp(session_state):
    session_id = session_state['sessionId']
    response = session_state['response']
    parser = session_state['parser']
    try:
        if 'pdf' in parser.get_file():
            if parser.get_file() in session_state['annotators'].keys():
                annotator = session_state['annotators'][parser.get_file()]
            else:
                annotator = PdfAnnotator(parser.get_file())
                session_state['annotators'][parser.get_file()] = annotator
            annotator.highlight(parser, response)
            path = os.path.join('annotated_docs', session_id)
            if os.path.isdir(path) is False:
                os.makedirs(path)
            annotator.save_new_pdf(os.path.join(path, os.path.basename(parser.get_file())))
            return annotator
        else:
            print("PDF FAILING")
            raise Exception
    except Exception as err:
        print("Sorry, there was an error trying to annotate the pdf. Please ask your question again!")
        return None

In [None]:
s = 'COCA COLA CO' 
s.title()