In [1]:
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

In [2]:
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [3]:
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

In [4]:
text = ""
pdf_reader = PdfReader("Motivation Letter Teh Chen Ming.pdf")
for page in pdf_reader.pages:
    text += page.extract_text()

In [5]:
text

"Name: Teh Chen Ming , email: tehchenm ing7777@gmail.com  \nMy motivation for applying to the ASEAN Seeds for the Future program is because I have \na strong interest in leveraging technology to create sustainable solution, particularly in \nthe green energy sector.   During my studies in Computer Science with a specialization in \nData Analysis at Asia Pacific University in Malaysia, I have engaged in various projects \nlike the prosumer energy behavior prediction model which honed my ability to tackle \nreal-world problems  through data -driven innovation.   \n \nParticipating in the KitaHack competition, our team developed a home energy \nmanagement system called GreenPeak which make me interested in energy sector and \ngreen energy and inspired me to have my new idea agrivoltaics, a integration of solar \nenergy genera tion with agriculture, utilizing digital twins, blockchain for data privacy and \nAI for enhanced efficiency and AI assistance.   This aligns with my professional go

In [6]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
    chunks = text_splitter.split_text(text)
    return chunks

In [7]:
test = get_text_chunks(text)
test

["Name: Teh Chen Ming , email: tehchenm ing7777@gmail.com  \nMy motivation for applying to the ASEAN Seeds for the Future program is because I have \na strong interest in leveraging technology to create sustainable solution, particularly in \nthe green energy sector.   During my studies in Computer Science with a specialization in \nData Analysis at Asia Pacific University in Malaysia, I have engaged in various projects \nlike the prosumer energy behavior prediction model which honed my ability to tackle \nreal-world problems  through data -driven innovation.   \n \nParticipating in the KitaHack competition, our team developed a home energy \nmanagement system called GreenPeak which make me interested in energy sector and \ngreen energy and inspired me to have my new idea agrivoltaics, a integration of solar \nenergy genera tion with agriculture, utilizing digital twins, blockchain for data privacy and \nAI for enhanced efficiency and AI assistance.   This aligns with my professional g

In [8]:
def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")


In [9]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
vector_store = FAISS.from_texts(test, embedding=embeddings)
vector_store.save_local("faiss_index")

In [10]:
def get_conversational_chain():

    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             temperature=0.3)

    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

    return chain

In [14]:
prompt_template = """
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
Context:\n {context}?\n
Question: \n{question}\n

Answer:
"""

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                            temperature=0.3)

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [12]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    
    new_db = FAISS.load_local("faiss_index", embeddings)
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    
    response = chain(
        {"input_documents":docs, "question": user_question}
        , return_only_outputs=True)

    print(response)


In [15]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
user_question = "show all the competition and hackthon he join"

new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(user_question)

chain = get_conversational_chain()


response = chain.invoke(
        {"input_documents": docs, "question": user_question},
        return_only_outputs=True
    )

print(response)

{'output_text': 'The provided context mentions that Teh Chen Ming participated in the **KitaHack competition**. \n'}
