In [1]:
from langchain.vectorstores.pgvector import PGVector
import pandas as pd
import numpy as np
from langchain.document_loaders import DataFrameLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.pgvector import DistanceStrategy
from langchain.schema import Document

In [2]:
CONNECTION_STRING = "postgresql://postgres:123456@localhost:5433/wateraid"
df = pd.read_csv('Listings_Details.csv')
df.fillna("NA", inplace=True)
df.head()

Unnamed: 0,Listing URL,Name of Activity,Date,Location,Event Synopsis,Event Description,Registration Link,Activity Category
0,https://www.wateraid.org//uk/get-involved/givi...,Hire our Handwashing Hubs,,,Hire our contactless handwashing facilities to...,Frequent handwashing is one of the most effect...,,giving
1,https://www.wateraid.org//uk/get-involved/even...,Swim Serpentine,14 September 2024,"Hyde Park, London",Enjoy a late summer swim in the beautiful surr...,Take in the views with a leisurely half mile o...,https://forms.office.com/Pages/ResponsePage.as...,events
2,https://www.wateraid.org//uk/get-involved/teac...,World Water Day activities,,,Use World Water Day on 22 March to introduce y...,703 million people in the world – that's almos...,,teaching resources
3,https://www.wateraid.org//uk/get-involved/teac...,Educational resources from your local water co...,,,Discover the education opportunities your loca...,"In 1981, WaterAid was set up by members of the...",,teaching
4,https://www.wateraid.org//uk/get-involved/givi...,Muslim Faith Giving,,,"Around the world, many Muslim communities do n...",Clean water and good hygiene means families ca...,,giving


In [3]:
combined = []

for index, row in df.iterrows():
    text_to_embed = row[1] + ". Location is " + row[3] + ". " + row[4] + " " + row[5]
    combined.append(text_to_embed)

df['combined'] = combined

In [4]:
df.head()


Unnamed: 0,Listing URL,Name of Activity,Date,Location,Event Synopsis,Event Description,Registration Link,Activity Category,combined
0,https://www.wateraid.org//uk/get-involved/givi...,Hire our Handwashing Hubs,,,Hire our contactless handwashing facilities to...,Frequent handwashing is one of the most effect...,,giving,Hire our Handwashing Hubs. Location is NA. Hir...
1,https://www.wateraid.org//uk/get-involved/even...,Swim Serpentine,14 September 2024,"Hyde Park, London",Enjoy a late summer swim in the beautiful surr...,Take in the views with a leisurely half mile o...,https://forms.office.com/Pages/ResponsePage.as...,events,"Swim Serpentine. Location is Hyde Park, London..."
2,https://www.wateraid.org//uk/get-involved/teac...,World Water Day activities,,,Use World Water Day on 22 March to introduce y...,703 million people in the world – that's almos...,,teaching resources,World Water Day activities. Location is NA. Us...
3,https://www.wateraid.org//uk/get-involved/teac...,Educational resources from your local water co...,,,Discover the education opportunities your loca...,"In 1981, WaterAid was set up by members of the...",,teaching,Educational resources from your local water co...
4,https://www.wateraid.org//uk/get-involved/givi...,Muslim Faith Giving,,,"Around the world, many Muslim communities do n...",Clean water and good hygiene means families ca...,,giving,Muslim Faith Giving. Location is NA. Around th...


In [5]:
# page_content_column is the column name in the dataframe to create embeddings for
loader = DataFrameLoader(df, page_content_column = 'combined')
docs = loader.load()

In [6]:
embeddings = HuggingFaceEmbeddings()

db = PGVector.from_documents(
    documents= docs,
    embedding = embeddings,
    collection_name= "listings_documents",
    distance_strategy = DistanceStrategy.COSINE,
    connection_string=CONNECTION_STRING)

  from tqdm.autonotebook import tqdm, trange


In [9]:
# TESTING USAGE / QUERY / SEARCH OF VECTOR DATABASE 

store = PGVector(
    connection_string=CONNECTION_STRING, 
    embedding_function=embeddings, 
    collection_name="listings_documents",
    distance_strategy=DistanceStrategy.COSINE
)

query = "I am based in Newcastle. I am a accountant. I like to watch variety shows in my free time. What activities would you recommend to me?"

docs =  store.similarity_search(query, k=3)


for doc in docs:
    doc_content = doc.page_content
    print(doc_content)

    doc_metadata = doc.metadata
    print(doc_metadata['Name of Activity'])
    print(doc_metadata['Listing URL'])


Yorkshire Three Peaks weekend. Location is Chapel-le-Dale, North Yorkshire. Summit Ingleborough, Whernside and Pen y Ghent on this tough – but achievable! – weekend. Join a small group of like-minded trekkers on this fully-supported challenge, led by the experts at Discover Adventure.How to register
Yorkshire Three Peaks weekend
https://www.wateraid.org//uk/get-involved/events/yorkshire-three-peaks-weekend
Bake for WaterAid. Location is NA. Sweet or savoury, everyone loves a bake sale – and few people would turn down a piece of cake for a good cause! If you don't want to be the only one slaving over a hot stove, get your colleagues or friends involved by adding a competitive element – nominate a few lucky judges and ask them to vote on the tastiest culinary contribution. Remind everyone it's all for a great cause with some WaterAid resources to draw attention to your sale. We can send you a tablecloth, balloons, bunting, leaflets and a collection tin – drop us a line at [email protecte