In [1]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [2]:
from langchain_community.document_loaders import PyPDFLoader
# Convert PDF into Documents so that it can be ingested by Vector Database
loader = PyPDFLoader("turna_llm.pdf")
pages = loader.load_and_split()

In [3]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [4]:
from langchain_community.vectorstores import FAISS

vector = FAISS.from_documents(pages, embeddings)

In [5]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

# Following chain simply takes a document embeds it into the prompt and sends the promt the llm
document_chain = create_stuff_documents_chain(llm, prompt)

In [6]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
# Following chain finds a Document related to the input and passes it to document_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [7]:
response = retrieval_chain.invoke({"input": "What is Turna LLM? Who are the creators of it?"})

In [8]:
print(response["answer"])

TURNA is a language model developed for the low-resource language Turkish that is capable of both natural language understanding and generation tasks. It is an encoder-decoder model based on the UL2 framework and was created by Gökçe Uludoğan, Zeynep Yirmibeşoğlu Balal, Furkan Akkurt, Melikşah Türker, Onur Güngör, and Susan Üsküdarlı from the Department of Computer Engineering at Bogazici University.
