In [100]:
from google.auth.transport.requests import Request
from google.oauth2.service_account import Credentials
import vertexai

In [103]:
PROJECT_ID = "southern-field-419613"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
CREDENTIALS_FILE = "../_google/gcp-credential/key.json"

In [104]:
print("Initialize Google VertexAI!")

credentials = Credentials.from_service_account_file(
    CREDENTIALS_FILE,
    scopes=['https://www.googleapis.com/auth/cloud-platform'])
if credentials.expired:
    credentials.refresh(Request())
    
# vertexai.init(project=PROJECT_ID, location=LOCATION, credentials = credentials)


Initialize Google VertexAI!


In [105]:
TEXT_MODEL = "gemini-1.0-pro"
EMBEDDING_MODEL = "textembedding-gecko@003"

In [106]:
# Utils
# import os
# import time
# from typing import List

# Langchain
import langchain
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain.document_loaders import TextLoader, UnstructuredPDFLoader
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain_google_vertexai import VertexAI, VertexAIEmbeddings
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

print(f"LangChain version: {langchain.__version__}")

# Vertex AI
from google.cloud import aiplatform

print(f"Vertex AI SDK version: {aiplatform.__version__}")

# HTML Widgets
import ipywidgets as widgets
from IPython.display import clear_output

LangChain version: 0.1.16
Vertex AI SDK version: 1.48.0


In [107]:
# Custom VertexAI Embeddings object
EMBEDDING_NUM_BATCH = 5

embeddings = VertexAIEmbeddings(
    model_name=EMBEDDING_MODEL, batch_size=EMBEDDING_NUM_BATCH
)

In [108]:
#  Intialising the Vertex Language model with required parameters
llm = VertexAI(
    model=TEXT_MODEL,
    max_output_tokens=2048,
    temperature=0.2,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

In [109]:
from langchain_community.document_loaders import TextLoader
from langchain.docstore.document import Document

import requests

loader = TextLoader("./summary.txt")
docs_from_txt = loader.load()

In [110]:
from langchain_community.document_loaders import TextLoader
from langchain.docstore.document import Document

import requests

# Define the URL
url_summary = "https://storage.googleapis.com/talking-dataset/5QW4F8/summary.txt"
url_transcript = "https://storage.googleapis.com/talking-dataset/5QW4F8/transcript.txt"
# Get the content from the URL
res_summary = requests.get(url_summary).text
res_transcript = requests.get(url_transcript).text

page_content = f"\
    Summary:\n\
    <summary>\
    {res_summary}\n\
    </summary>\n\n\n\
    Transcript:\n\
    <transcript>\
    {res_transcript}\n\
    </transcript>"


# Get the 
docs =  [
    Document(page_content=page_content, metadata={"convType": "debating"}),
           ]

In [111]:
docs[0].metadata

{'convType': 'debating'}

In [112]:
text_splitter = CharacterTextSplitter(chunk_size=8192, chunk_overlap=128)
doc = text_splitter.split_documents(docs)

In [113]:
template = """
You are a helpful AI assistant to answer about the question 
based on the conversation information provided.
conversation:
<conversation>
{context}
</conversation>

The question you got is like this.
question:
<question>
{input}
</question>

Guidlines for other possible question
* If user ask the similar question before, using the same answer but just rephrase it.
* If you cannot find an answer ask the user to rephrase the question.
* If you got a question about the meaning of the word, answer it with a general information you have.
* If you got an answer from outside of the conversation above, mention with the clear notice that it is not coming from the given conversation.
answer:

"""
prompt = PromptTemplate.from_template(template)

In [114]:
vectorstore = Chroma.from_documents(doc, embeddings)

document_content_description = "Conversation summary and its original transcript"
metadata_field_info = [
    AttributeInfo(
        name="convType",
        description="Type of conversation",
        type="string",
    ),
]
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info
)

In [115]:
# Create the retrieval chain
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [116]:
# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": "Tell me about the topic of my conversation."})
print(response['answer'])

The topic of your conversation is job creation. President Obama and Governor Romney discussed their different approaches to creating jobs in the United States. President Obama argued that investing in education and training, developing new sources of energy, changing the tax code to benefit small businesses, rebuilding America with savings from winding down wars, and reducing the deficit in a balanced way would all help to create jobs. Governor Romney argued that achieving North American energy independence, opening up more trade, cracking down on China's unfair trade practices, ensuring Americans have the skills needed to succeed through improved education, balancing the budget, and championing small businesses would all help to create jobs.



In [117]:
# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": "What is the meaning of the tax?"})
print(response['answer'])

The tax mentioned in the conversation refers to the proposed changes to the tax code by both Barack Obama and Mitt Romney. 

* **Barack Obama:** Proposes changing the tax code to benefit small businesses and companies operating in the US. This suggests he may be considering lowering tax rates for these entities or providing them with tax breaks.
* **Mitt Romney:** Does not explicitly mention changes to the tax code in this excerpt. However, he does criticize Obama's economic policies as being similar to his 2008 proposals, which included tax cuts for the wealthy. This suggests that Romney may favor tax cuts for higher-income earners.

It's important to note that this conversation only provides a brief overview of each candidate's tax plans. To get a more comprehensive understanding of their proposals, it would be necessary to consult additional sources.



In [118]:
# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": "What is the meaning of tax in this conversation?"})
print(response['answer'])

The word "tax" is mentioned several times in the conversation, but its meaning depends on the context. Here are the different ways it is used:

**1. As a general term for government revenue:**

* **Obama:** "I think it's important for us to develop new sources of energy here in America that we **change our tax code** to make sure that we're helping small businesses and companies that are open here in the United States."
* **Romney:** "My plan has five basic parts... number four, get us to a **balanced budget**."

In these cases, "tax" refers to the system of collecting money from individuals and businesses to fund government programs and services.

**2. As a specific policy proposal:**

* **Romney:** "I'm going to do my plan has five basic parts... number one, get its energy independent North American Energy independent that creates about four million job number two, open up more trade for tickling in Latin America Crackdown in China if and when they cheat Number 3 make sure our people

In [119]:
button = widgets.Button(description="Ask Me!")
output = widgets.Output()
button_stp = widgets.Button(description="More details")
output = widgets.Output()
text = widgets.Text(
    description="Question:", layout=widgets.Layout(width="80%", height="50px")
)
display(text, button, button_stp, output)


@output.capture()
def on_button_clicked(b):
    clear_output()
    question = text.value

    result = retrieval_chain.invoke({"input": question})
    # source_documents = list({doc.metadata["source"] for doc in result["context"]})

    print("\nAnswer-\n\n", result["answer"])
    # print("\nSource-", "\n".join(source_documents))
    print("\n")


@output.capture()
def on_stp_clicked(b):
    clear_output()
    question = text.value
    query = question + "Give detailed information as much as possible. "
    result = retrieval_chain.invoke({"input": query})
    # source_documents = list({doc.metadata["source"] for doc in result["context"]})

    print("\nAnswer\n\n", result["answer"])
    # print("\nSource-", "\n".join(source_documents))
    print("\n")


button.on_click(on_button_clicked)
button_stp.on_click(on_stp_clicked)

Text(value='', description='Question:', layout=Layout(height='50px', width='80%'))

Button(description='Ask Me!', style=ButtonStyle())

Button(description='More details', style=ButtonStyle())

Output()