In [3]:
!pip install --upgrade --quiet pinecone-client pinecone-text pinecone-notebooks

In [4]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [7]:
api_key = "pcsk_3cLEbT_RP7dEJo2baFKSdv3ZabBAiLpmURzsuuCzjpuyxSxt6gBmMddxMRFsT1dWYvKipj"
import os
from pinecone import Pinecone, ServerlessSpec
index_name = "rag-exmp"
pc = Pinecone(api_key = api_key)

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name = index_name,
        dimension = 384,
        metric = "dotproduct",
        spec = ServerlessSpec(cloud='aws',region='us-east-1')
    )

In [8]:
index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x1ef0c17ac60>

In [13]:
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
from langchain_huggingface import HuggingFaceEmbeddings
embed = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embed

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [14]:
from pinecone_text.sparse import BM25Encoder

bm2_encoder = BM25Encoder().default()
bm2_encoder

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\athar\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x1ef4d9b1850>

In [15]:
sent = [
    "India is my country",
    "India is beutiful country",
    "India has powerfull defense system"
]

bm2_encoder.fit(sent)   #apply TF-IDF values 

bm2_encoder.dump("bm2_values.json")


100%|██████████| 3/3 [00:00<00:00, 52.25it/s]


In [61]:
retriever = PineconeHybridSearchRetriever(embeddings=embed,sparse_encoder=bm2_encoder,index=index)

In [62]:
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x000001EF4D9B1850>, index=<pinecone.data.index.Index object at 0x000001EF0C17AC60>)

In [18]:
retriever.add_texts(
    [
    "India is my country",
    "India is beutiful country",
    "India has powerfull defense system"
    ]
)

100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


In [25]:
info = retriever.invoke("tell me about India")

In [30]:
info

[Document(metadata={'score': 0.374628544}, page_content='India is beutiful country'),
 Document(metadata={'score': 0.3610484}, page_content='India is my country'),
 Document(metadata={'score': 0.279743701}, page_content='India has powerfull defense system')]

In [38]:
from pypdf import PdfReader

def extract_text_from_pdf(pdf_path):
    text = []
    with open(pdf_path, 'rb') as pdf_file:
        reader = PdfReader(pdf_file)
        for page in reader.pages:
            text.append(page.extract_text())
    return text

pdf_path = 'into_to_AI.pdf'
extracted_text = extract_text_from_pdf(pdf_path)
print((extracted_text))

['A Brief Introduction to Artificial IntelligenceWhat is AI and how is it going to shape the futureBy Dibbyo Saha, Undergraduate Student, Computer Science,Ryerson University\nWhat is Artificial Intelligence?\nImage by Gerd Altmann from Pixabay\nGenerally speaking, Artificial Intelligence is a computing concept that helps amachinethinkandsolvecomplexproblemsaswehumansdowithourintelligence.For example, we performa task, make mistakes and learn fromour mistakes (Atleast thewiseonesofusdo!).Likewise,anAIorArtificialIntelligenceissupposedtoworkonaproblem, makesomemistakes insolvingtheproblemandlearnfromthe problems in a self-correcting manner as a part of its self-improvement. Or inother words, thinkof this likeplayingagameof chess.Everybadmoveyoumakereduces your chances of winning the game. So, every time you loseagainst yourfriend, you try remembering the moves you made which you shouldn’t have andapplythat knowledgeinyour next gameandsoon. Eventually, youget better andyour precision, or 

In [63]:
retriever.add_texts(extracted_text)

100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


In [64]:
inf = retriever.invoke("history of AI?")

In [66]:
inf[0].page_content

'redefined and more specialised which is really important for the new world toprosper and advance.Towards Conclusion…\nThe growth of Artificial Intelligence in recent times has been exponential. Wecannot evenimaginehowbigandimpactfulAIisgoingtobeinthenearfutureandhowdrasticallyit is goingtochangeandupgradetheworldweliveintoday.Thereare a lot more to learn about AI andits rapidlygrowingapplications inour life. Ibelieve it wouldbewisetoadapt tothis changingworldandacquireskills relatedto Artificial Intelligence and technology. Just like AI learns and develops, weshould too - to make this world a better place.'

In [48]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("into_to_AI.pdf")
documents = loader.load()
documents

[Document(metadata={'producer': 'Skia/PDF m92 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '2021-05-05T13:04:55+01:00', 'moddate': '2021-05-05T13:04:55+01:00', 'subject': 'Article about artificial intelligence (AI) written by Ryerson Computer Science student, Dibbyo Saha, for Science Rendezvous.', 'title': 'A Brief Introduction to Artificial Intelligence', 'source': 'into_to_AI.pdf', 'total_pages': 9, 'page': 0, 'page_label': '1'}, page_content='A Brief Introduction to Artificial IntelligenceWhat is AI and how is it going to shape the futureBy Dibbyo Saha, Undergraduate Student, Computer Science,Ryerson University\nWhat is Artificial Intelligence?\nImage by Gerd Altmann from Pixabay\nGenerally speaking, Artificial Intelligence is a computing concept that helps amachinethinkandsolvecomplexproblemsaswehumansdowithourintelligence.For example, we performa task, make mistakes and learn fromour mistakes (Atleast thewiseonesofusdo!).Likewise,anAIorArtificialIntelligenceissuppose

In [53]:
documents.page_content

AttributeError: 'list' object has no attribute 'page_content'

In [67]:
text = []
for i in documents:
    text.append(i.page_content)

print(len(text))


9


In [68]:
retriever.add_texts(text)


100%|██████████| 1/1 [00:01<00:00,  1.20s/it]


In [69]:
retriever.invoke("what is AI?")[0].page_content

'redefined and more specialised which is really important for the new world toprosper and advance.Towards Conclusion…\nThe growth of Artificial Intelligence in recent times has been exponential. Wecannot evenimaginehowbigandimpactfulAIisgoingtobeinthenearfutureandhowdrasticallyit is goingtochangeandupgradetheworldweliveintoday.Thereare a lot more to learn about AI andits rapidlygrowingapplications inour life. Ibelieve it wouldbewisetoadapt tothis changingworldandacquireskills relatedto Artificial Intelligence and technology. Just like AI learns and develops, weshould too - to make this world a better place.'

In [70]:
from ingest import retriever

100%|██████████| 9/9 [00:00<00:00, 389.49it/s]
100%|██████████| 1/1 [00:03<00:00,  3.37s/it]


[]


In [71]:
retriever.invoke("what is AI?")

[]