In [97]:
## Import libraries 

import openai
import langchain 
import pinecone 
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

from dotenv import load_dotenv
load_dotenv() #load all variables

import os


In [98]:
## Lets Read the document 
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [99]:
doc = read_doc("documents/")
doc

[Document(page_content=' \n \n UNIVERSITY OF COPENHAGEN Emulation of Nintendo Game Boy\n(DMG-01)\nPyBoy\nTroels Ynddal Mads Ynddal Asger Lund Hansen\nQWV828 SJT402 CMG881\nJanuary 18, 2016', metadata={'source': 'documents\\PyBoy.pdf', 'page': 0}),
 Document(page_content='Abstract\nThis project is covering an emulation of the Nintendo Game Boy (DMG-01) from 1989.\nThe Game Boy has been emulated many times before, but this project will emulate it in\nthe programming language Python 2.7. The implementation is not based on any existing\nemulator, but is made from scratch. The emulation has proven to be fast enough, to run\nsoftware from cartridge dumps, with the same speed as the Game Boy. Most essential\ncomponents of the Game Boy, are part of the emulation, but sound and serial port are\nnot included in this project. The implementation runs in almost pure Python, but with\ndependencies for drawing graphics and getting user interactions through SDL2 and NumPy.', metadata={'source': 'docum

In [100]:
len(doc)

40

In [101]:
## Dive the docs into chuks 

def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc = text_splitter.split_documents(docs)
    return docs 


In [102]:
documents = chunk_data(docs=doc)
print(len(documents))
documents

40


[Document(page_content=' \n \n UNIVERSITY OF COPENHAGEN Emulation of Nintendo Game Boy\n(DMG-01)\nPyBoy\nTroels Ynddal Mads Ynddal Asger Lund Hansen\nQWV828 SJT402 CMG881\nJanuary 18, 2016', metadata={'source': 'documents\\PyBoy.pdf', 'page': 0}),
 Document(page_content='Abstract\nThis project is covering an emulation of the Nintendo Game Boy (DMG-01) from 1989.\nThe Game Boy has been emulated many times before, but this project will emulate it in\nthe programming language Python 2.7. The implementation is not based on any existing\nemulator, but is made from scratch. The emulation has proven to be fast enough, to run\nsoftware from cartridge dumps, with the same speed as the Game Boy. Most essential\ncomponents of the Game Boy, are part of the emulation, but sound and serial port are\nnot included in this project. The implementation runs in almost pure Python, but with\ndependencies for drawing graphics and getting user interactions through SDL2 and NumPy.', metadata={'source': 'docum

In [103]:
## Embedding Technique of OPENAI --> ## if you want to use OPENAI 

#from langchain.embeddings.openai import OpenAIEmbeddings
#embeddings= OpenAIEmbeddings(api_key=os.environ["OPENAI_API_KEY"]) 


## Embedding Technique of Llama

from langchain_community.embeddings.ollama import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [104]:
vectors = embeddings.embed_query("How are you?")
print(len(vectors))
vectors 

768


[1.1484825611114502,
 0.49825072288513184,
 -1.8537139892578125,
 -1.131885051727295,
 -0.6941664218902588,
 0.8938246965408325,
 0.7071433663368225,
 -1.1043038368225098,
 0.3855056166648865,
 -0.5652509331703186,
 0.5755836963653564,
 -0.39079150557518005,
 0.642055094242096,
 0.0009496277198195457,
 1.4630216360092163,
 -1.267431378364563,
 0.14008978009223938,
 -0.41125616431236267,
 0.2649652063846588,
 -0.9019607305526733,
 0.010129362344741821,
 -1.533316969871521,
 0.1061345636844635,
 -0.18648742139339447,
 -1.2285411357879639,
 -1.8969976902008057,
 -1.5175330638885498,
 1.6283458471298218,
 -1.6342037916183472,
 -0.025510279461741447,
 0.3828410506248474,
 0.5111056566238403,
 -0.2716519832611084,
 0.11581438034772873,
 -1.5324219465255737,
 -1.6473841667175293,
 1.381579875946045,
 -0.7219509482383728,
 -0.06635911017656326,
 1.50679349899292,
 3.353022336959839,
 1.3806849718093872,
 -0.20049729943275452,
 -0.4748189449310303,
 -0.7369212508201599,
 -1.3167357444763184,
 -

In [105]:
## VECTOR SEACH DB IN PINECONE 
# in .env remember to put: --> PINECONE_API_KEY: Your Pinecone API key.
from langchain_pinecone import PineconeVectorStore

index_name ="langchainvector"
index = PineconeVectorStore.from_documents(doc,embeddings,index_name=index_name)



In [106]:
## Cosine Similarity Retreive Results from VectorDB
def retrieve_query(query, k=2):
    matching_results = index.similarity_search(query, k=k)
    return matching_results

In [109]:
from langchain.chains.question_answering import load_qa_chain


## If you want to use openai you can use this:
#from langchain.llms import openai
#llm = openai(model_name="text-davinci-003",temperature=0.5)


## If you want to use llama3:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3",temperature=0.6)

## Add the model to the chain
chain = load_qa_chain(llm,chain_type="stuff")

In [110]:
## Search answers from VectorDB
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    print(doc_search)
    response = chain.run(input_documents=doc_search, question=query)
    return response 

In [113]:
## Test to see how it works
our_query = "What is the partial conclusion?"
answer = retrieve_answers(our_query)
print(answer)

[Document(page_content='Contents\n1 Introduction 6\n1.1 Motivation . . . . . . . . . . 6\n1.2 What is a Game Boy . . . . . 6\n1.3 Terminology . . . . . . . . . . 6\n2 Emulation 6\n2.1 Architechture . . . . . . . . . 7\n2.1.1 Structure . . . . . . . 7\n2.1.2 Interconnection . . . . 7\n2.1.3 Implementation . . . . 7\n2.2 Partial Conclusion . . . . . . 8\n3 Central Processing Unit 8\n3.1 Sharp LR35902 . . . . . . . . 8\n3.2 Opcodes . . . . . . . . . . . . 9\n3.3 Registers . . . . . . . . . . . . 9\n3.3.1 General purpose reg-\nisters . . . . . . . . . . 9\n3.3.2 Special registers . . . 10\n3.4 Operations . . . . . . . . . . 10\n3.4.1 Arithmetic Logic Unit 10\n3.4.2 Load data . . . . . . . 10\n3.4.3 Jumps . . . . . . . . . 11\n3.5 Interrupts . . . . . . . . . . . 11\n3.5.1 VBLANK . . . . . . . . 11\n3.5.2 LCDC . . . . . . . . . . 12\n3.5.3 SERIAL . . . . . . . . 12\n3.5.4 TIMER . . . . . . . . . 12\n3.5.5 HiToLo . . . . . . . . 12\n3.5.6 HALT . . . . . . . . . . 12\n3.6 Emulation . . . . . 