In [13]:
import os 
from dotenv import load_dotenv
load_dotenv()

True

In [36]:
from IPython.display import Markdown

In [14]:
GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']

In [5]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA

### Load the Paper

In [6]:
loader = DirectoryLoader(path='./paper', glob='**/*.pdf', loader_cls=PyPDFLoader)
data = loader.load()

In [7]:
data[0].page_content

'KGAT: Knowledge Graph Attention Network for\nRecommendation\nXiang Wang\nNational University of Singapore\nxiangwang@u.nus.eduXiangnan He∗\nUniversity of Science and Technology\nof China\nxiangnanhe@gmail.comYixin Cao\nNational University of Singapore\ncaoyixin2011@gmail.com\nMeng Liu\nShandong University\nmengliu.sdu@gmail.comTat-Seng Chua\nNational University of Singapore\ndcscts@nus.edu.sg\nABSTRACT\nTo provide more accurate, diverse, and explainable recommendation,\nit is compulsory to go beyond modeling user-item interactions\nand take side information into account. Traditional methods like\nfactorization machine (FM) cast it as a supervised learning problem,\nwhich assumes each interaction as an independent instance with\nside information encoded. Due to the overlook of the relations\namong instances or items (e.g., the director of a movie is also an\nactor of another movie), these methods are insufficient to distill the\ncollaborative signal from the collective behaviors of use

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size= 1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

In [10]:
texts[0].page_content

'KGAT: Knowledge Graph Attention Network for\nRecommendation\nXiang Wang\nNational University of Singapore\nxiangwang@u.nus.eduXiangnan He∗\nUniversity of Science and Technology\nof China\nxiangnanhe@gmail.comYixin Cao\nNational University of Singapore\ncaoyixin2011@gmail.com\nMeng Liu\nShandong University\nmengliu.sdu@gmail.comTat-Seng Chua\nNational University of Singapore\ndcscts@nus.edu.sg\nABSTRACT\nTo provide more accurate, diverse, and explainable recommendation,\nit is compulsory to go beyond modeling user-item interactions\nand take side information into account. Traditional methods like\nfactorization machine (FM) cast it as a supervised learning problem,\nwhich assumes each interaction as an independent instance with\nside information encoded. Due to the overlook of the relations\namong instances or items (e.g., the director of a movie is also an\nactor of another movie), these methods are insufficient to distill the\ncollaborative signal from the collective behaviors of use

In [17]:
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001', google_api_key=GOOGLE_API_KEY)
vector_store = Chroma.from_documents(documents=texts, embedding=embeddings)

I0000 00:00:1722924685.988095   14584 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [23]:
retriever = vector_store.as_retriever()

In [21]:
llm = GoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY)

In [25]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", 
    retriever=retriever,
    return_source_documents=True
)

In [32]:
query = ("""
You are a [Research Assitant] bot. You help with creating [Literature Review]. 

Input :You will be given access to a [research paper]


Task: You have to [extract] the following information:
Information To BE extracted is present in backticks: 

Paper Name
Focus Area of the paper
Methodology: (eg Qualitative, Quantitative, Review, Conceptual, Report)
Key Findings: in 10 words what the paper has implemented/achieved
Application: real life potential use cases (summerize in 10 words or 1-2 points)
Challenges:  Drawbacks of this paper/approach (summerize 1-2 short points)
Opportunities: Future scope/possibilities of paper (summerize in 1-2 points)



Output Format: 

json
{
"paper_name": "name of the paper",
.
.
.
"opportunities": "..."
}

""")

In [33]:
result = qa_chain.invoke({'query': query})

In [34]:
result['result']

'```json\n{\n"paper_name": "KGAT: Knowledge Graph Attention Network for Recommendation",\n"focus_area": "Recommender Systems, Knowledge Graph Embeddings, Attention Mechanisms",\n"methodology": "Quantitative",\n"key_findings": "Knowledge graph attention network for recommendation",\n"application": "Personalized recommendations, Explainable AI",\n"challenges": "Data sparsity, Scalability for large knowledge graphs",\n"opportunities": "Extension to multi-modal data, Integration with other recommendation techniques"\n}\n``` \n'

In [37]:
Markdown(result['result'])

```json
{
"paper_name": "KGAT: Knowledge Graph Attention Network for Recommendation",
"focus_area": "Recommender Systems, Knowledge Graph Embeddings, Attention Mechanisms",
"methodology": "Quantitative",
"key_findings": "Knowledge graph attention network for recommendation",
"application": "Personalized recommendations, Explainable AI",
"challenges": "Data sparsity, Scalability for large knowledge graphs",
"opportunities": "Extension to multi-modal data, Integration with other recommendation techniques"
}
``` 
