In [2]:
## Data Ingestion

from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")


text_documents = loader.load()
text_documents


[Document(metadata={'source': 'speech.txt'}, page_content='In all probability this will be my last speech to you. Even if the Government allow me to march tomorrow morning, this will be my last speech on the sacred banks of the Sabarmati. Possibly these may be the last words of my life here.\n\nI have already told you yesterday what I had to say. Today I shall confine myself to what you should do after my companions and I are arrested. The programme of the march to Jalalpur must be fulfilled as originally settled. The enlistment of the volunteers for this purpose should be confined to Gujarat only. From what I have been and heard during the last fortnight, I am inclined to believe that the stream of civil resisters will flow unbroken.\n\nBut let there be not a semblance of breach of peace even after all of us have been arrested. We have resolved to utilize all our resources in the pursuit of an exclusively nonviolent struggle. Let no one commit a wrong in anger. This is my hope and pra

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

# os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")



True

In [4]:
## Reading from the Web-based loader (Another Data Ingestion technique)

from langchain_community.document_loaders import WebBaseLoader
import bs4

## Load, chunk and index the content of the html page

loader = WebBaseLoader(web_path=("https://lilianweng.github.io/posts/2021-05-31-contrastive/",),
                       bs_kwargs = dict(parse_only=bs4.SoupStrainer(
                           class_ = ("post-title", "post_content", "post_header"))), )


text_ducuments  = loader.load()


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
text_documents

[Document(metadata={'source': 'speech.txt'}, page_content='In all probability this will be my last speech to you. Even if the Government allow me to march tomorrow morning, this will be my last speech on the sacred banks of the Sabarmati. Possibly these may be the last words of my life here.\n\nI have already told you yesterday what I had to say. Today I shall confine myself to what you should do after my companions and I are arrested. The programme of the march to Jalalpur must be fulfilled as originally settled. The enlistment of the volunteers for this purpose should be confined to Gujarat only. From what I have been and heard during the last fortnight, I am inclined to believe that the stream of civil resisters will flow unbroken.\n\nBut let there be not a semblance of breach of peace even after all of us have been arrested. We have resolved to utilize all our resources in the pursuit of an exclusively nonviolent struggle. Let no one commit a wrong in anger. This is my hope and pra

In [6]:
 ## PDF_Reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("VPR_Paper.pdf")
pdf_doc = loader.load()

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(pdf_doc)

documents[:5]

[Document(metadata={'producer': 'PyPDF', 'creator': 'PyPDF', 'creationdate': '2025-04-07T03:51:40+00:00', 'author': 'Xueyi Wang, Lele Zhang, Zheng Fan, Yang Liu, Chen Chen, Fang Deng', 'keywords': 'Computer Vision(CV): CV: Image and Video Retrieval, Computer Vision(CV): CV: Applications', 'moddate': '2025-04-07T03:51:42+00:00', 'subject': 'The Thirty-Ninth AAAI Conference on Artificial Intelligence (AAAI-25)', 'title': 'From Coarse to Fine: A Matching and Alignment Framework for Unsupervised Cross-View Geo-Localization', 'source': 'VPR_Paper.pdf', 'total_pages': 9, 'page': 0, 'page_label': '1'}, page_content='From Coarse to Fine: A Matching and Alignment Framework for Unsupervised\nCross-View Geo-Localization\nXueyi Wang1, Lele Zhang1, Zheng Fan1, Yang Liu1, Chen Chen1, Fang Deng1,2*\n1Beijing Institute of Technology\n2Beijing Institute of Technology Chongqing Innovation Center\n{wangxueyi, zhanglele, fanzheng, liuyang, xiaofan, dengfang}@bit.edu.cn\nAbstract\nCross-view geo-localizati

### Loading Source datam DONE, Next is "Transform" part from load, tramsform, embed

In [8]:
## Vector Embedding and Vecor Store
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings

from langchain_community.vectorstores import Chroma
## this db (below) will be our Vector Database
db = Chroma.from_documents(documents[:30], OllamaEmbeddings(model ="all-minilm")) ## You need to download this embedding model using `ollama pull all-minilm` 

  db = Chroma.from_documents(documents[:30], OllamaEmbeddings(model ="all-minilm")) ## You need to download this embedding model using `ollama pull all-minilm`


In [9]:
## CHROMA Vector database
query = "Who is Cross-View Geo Localization?"
result = db.similarity_search(query)

result

[Document(metadata={'total_pages': 9, 'keywords': 'Computer Vision(CV): CV: Image and Video Retrieval, Computer Vision(CV): CV: Applications', 'title': 'From Coarse to Fine: A Matching and Alignment Framework for Unsupervised Cross-View Geo-Localization', 'subject': 'The Thirty-Ninth AAAI Conference on Artificial Intelligence (AAAI-25)', 'author': 'Xueyi Wang, Lele Zhang, Zheng Fan, Yang Liu, Chen Chen, Fang Deng', 'creator': 'PyPDF', 'page': 0, 'creationdate': '2025-04-07T03:51:40+00:00', 'moddate': '2025-04-07T03:51:42+00:00', 'page_label': '1', 'source': 'VPR_Paper.pdf', 'producer': 'PyPDF'}, page_content='ate states and bringing both views closer to the intermediate\ndomain simultaneously. Extensive experiments demonstrate\nthat our method surpasses state-of-the-art unsupervised cross-\nview geo-localization methods and even achieves comparable\nperformance to state-of-the-art supervised methods.\nIntroduction\nCross-view geo-localization aims to locate an image by re-\ntrieving im

In [12]:
## FAISS Vector Database
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents[:30], OllamaEmbeddings(model ="mxbai-embed-large"))
db

<langchain_community.vectorstores.faiss.FAISS at 0x7a8cc4bfffd0>

In [11]:
query = "What is Cross-View Geo Localization?"
result = db.similarity_search(query)
result[0].page_content

'ate states and bringing both views closer to the intermediate\ndomain simultaneously. Extensive experiments demonstrate\nthat our method surpasses state-of-the-art unsupervised cross-\nview geo-localization methods and even achieves comparable\nperformance to state-of-the-art supervised methods.\nIntroduction\nCross-view geo-localization aims to locate an image by re-\ntrieving images from the same location but different views\nin a large-scale gallery (Workman, Souvenir, and Jacobs\n2015). Different from traditional vision-based geometry\ngeo-localization (Zhang et al. 2018; Deng et al. 2020; Gao\net al. 2021; Zhang et al. 2022), this research is capable of\nautonomous geo-localization in the absence or weakness of\nthe global navigation satellite system (GNSS) signal, with\nmore robust and flexible localization capabilities that enable\nwidespread appliances across various domains, including\nunmanned ground vehicles and drone localization and navi-'

In [None]:
from langchain_community import Ollama
llm = Ollama(model = "llama2")
