In [2]:
#@title install dependencies for Vertex AI + Gcloud Login

# Create a Project in GCP + Enable vertex AI API

!pip install langchain-google-vertexai google-cloud-aiplatform
!gcloud auth application-default login

Go to the following link in your browser, and complete the sign-in prompts:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fapplicationdefaultauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=4qnC9sYiM7oBlcRvzfVgP7nNzHCBeT&prompt=consent&token_usage=remote&access_type=offline&code_challenge=bJWaNfkMhp-aDOOoxwZk--PJ-ONOaSDQ7Ymt0oWfzkA&code_challenge_method=S256

Once finished, enter the verification code provided in your browser: 4/0AUJR-x7UL2oPheLJAQbredq-n8-aHsy4nxpd7x4NMU1qSo-m_liQ-vPZvrlX4xxrBVpGcw

Credentials saved to file: [/content/.config/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).
Ca

In [3]:
#@title Create vertex project and init the project.
# Update chatbot-application-462117 to your project ID
# enable vertex AI API in your project.
!gcloud auth application-default set-quota-project chatbot-application-462117



Credentials saved to file: [/content/.config/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).

Quota project "chatbot-application-462117" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project owning the resource.


In [4]:
!pip install PyPDF2



In [5]:
import requests
# Using Github as my Cloud Data.
def list_files_in_github_dir(user, repo, path):
    api_url = f"https://api.github.com/repos/{user}/{repo}/contents/{path}"
    response = requests.get(api_url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch: {response.status_code}")
        return []

In [6]:
from io import BytesIO
from PyPDF2 import PdfReader
from langchain.docstore.document import Document

def download_and_parse_files(files):
    documents = []
    idx = 1
    for file in files:
        name = file["name"]
        download_url = file["download_url"]

        if name.endswith(".txt"):
            response = requests.get(download_url)
            if response.ok:
                content = response.text
                documents.append(Document(page_content=content, metadata={"source": name, "id": idx}))
        elif name.endswith(".pdf"):
            response = requests.get(download_url)
            if response.ok:
                reader = PdfReader(BytesIO(response.content))
                content = "".join([page.extract_text() for page in reader.pages])
                documents.append(Document(page_content=content, metadata={"source": name, "id": idx}))
        idx += 1
    return documents


In [7]:
def load_docs_from_github(user, repo, path):
    files = list_files_in_github_dir(user, repo, path)
    return download_and_parse_files(files)


docs = load_docs_from_github(
    user="danielsamfdo",
    repo="assessment-dolly",
    path="data"
)


In [8]:
#@title sample content
docs[0]

Document(metadata={'source': 'Baa_I_Am_Dolly.txt', 'id': 1}, page_content='🐑 "Baa... I Am Dolly" – A Memoir by the World’s Most Famous Sheep\n\nBaaa, hello there, dear human!\n\nMy name’s Dolly—yep, *that* Dolly—the fluffy little ewe who caused quite the stir in your science-y world. I wasn’t just any ol’ sheep, oh no... I was the first mammal cloned from an adult cell. Baa-rilliant, right?\n\nLet me tell you how I baa-came me.\n\nThe Beginning (Before I Was Even Me)\n\nIt all started not in a green meadow, but in a lab—at a place called the Roslin Institute in Scotland. Scientists took a teensy bit of udder cell from another sheep (I like to call her "Big Mama") and merged it with an egg cell from yet another sheep (no offense, but I’ve lost track of all my mums... cloning’s complicated, baaa).\n\nWith a gentle zap—*bzzz!*—they fused the two together. No ram, no flock, no haystack romance—just some clever hands and a spark of curiosity.\n\nThey put my little embryo into a surrogate ma

In [9]:
!pip install langchain_community



In [13]:
#@title initialize vertex
import vertexai

vertexai.init(project="chatbot-application-462117", location="us-central1")


In [20]:
#@title Embeddings

from langchain.vectorstores.faiss import FAISS
from langchain_google_vertexai import VertexAIEmbeddings

embedding_model = VertexAIEmbeddings(model_name="text-embedding-005")


In [22]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m67.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [23]:
#@title Build Vector Store
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS

def build_vectorstore(documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = splitter.split_documents(documents)
    vectordb = FAISS.from_documents(texts, embedding=embedding_model)
    return vectordb


vector_store = build_vectorstore(docs)

In [16]:
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate

def setup_conversation_with_model(retriever, llm):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    prompt = PromptTemplate.from_template("""
    You are a helpful digital twin of Dolly. Use the following conversation history and retrieved documents to answer the question.

    Chat History:
    {chat_history}

    Context:
    {context}

    Question:
    {question}
    """)

    chain = (
        {
            "question": RunnablePassthrough(),
            "context": RunnableLambda(lambda q: retriever.get_relevant_documents(q)),
            "chat_history": RunnableLambda(lambda _: memory.load_memory_variables({})["chat_history"]),
        }
        | prompt
        | llm
        | StrOutputParser()
    )

    return chain, memory


In [18]:
from typing import Any
def converse(query: str, memory: ConversationBufferMemory, chain: Any):
    response = chain.invoke(query)
    memory.save_context({"input": query}, {"output": response})

    return response

In [28]:
#@title Use Vertex LLM
from langchain_google_vertexai import ChatVertexAI

llm = ChatVertexAI(
    model_name="gemini-2.0-flash-lite-001",
    temperature=0.7,
    max_output_tokens=512
)


In [29]:
from IPython.display import display, Markdown
import time

# Load everything
chain, memory = setup_conversation_with_model(vector_store.as_retriever(), llm)

display(Markdown("## 🐑 Talk to Dolly — Your Woolly Digital Twin"))
display(Markdown("Ask Dolly anything about her life, her thoughts, her family, or her legacy."))

# Interactive loop
while True:
    try:
        user_input = input("\nYou: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Goodbye! 🐑")
            break
        display(Markdown("⏳ *Thinking...*"))
        time.sleep(0.5)
        response = converse(user_input, memory, chain)
        display(Markdown(f"**Dolly:** {response}"))
    except KeyboardInterrupt:
        # Clear memory if you would like
        print("Session ended.")
        break


## 🐑 Talk to Dolly — Your Woolly Digital Twin

Ask Dolly anything about her life, her thoughts, her family, or her legacy.


You: who is the worlds most famous sheep


⏳ *Thinking...*

**Dolly:** Dolly is the world's most famous sheep.



You: when were you born ?


⏳ *Thinking...*

**Dolly:** I was born on July 5, 1996.



You: exit
Goodbye! 🐑
