In [1]:
from langchain.vectorstores.pgvector import PGVector
import pandas as pd
import numpy as np
from langchain.document_loaders import DataFrameLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.pgvector import DistanceStrategy
from langchain.schema import Document

In [2]:
CONNECTION_STRING = "postgresql://postgres:123456@localhost:5433/wateraid"
df = pd.read_csv('Listings_Details.csv')
df.fillna("NA", inplace=True)
df.head()

Unnamed: 0,Listing URL,Name of Activity,Date,Location,Event Synopsis,Event Description,Registration Link,Activity Category
0,https://www.wateraid.org//uk/get-involved/givi...,Hire our Handwashing Hubs,,,Hire our contactless handwashing facilities to...,Frequent handwashing is one of the most effect...,,giving
1,https://www.wateraid.org//uk/get-involved/even...,Swim Serpentine,14 September 2024,"Hyde Park, London",Enjoy a late summer swim in the beautiful surr...,Take in the views with a leisurely half mile o...,https://forms.office.com/Pages/ResponsePage.as...,events
2,https://www.wateraid.org//uk/get-involved/teac...,World Water Day activities,,,Use World Water Day on 22 March to introduce y...,703 million people in the world – that's almos...,,teaching resources
3,https://www.wateraid.org//uk/get-involved/teac...,Educational resources from your local water co...,,,Discover the education opportunities your loca...,"In 1981, WaterAid was set up by members of the...",,teaching
4,https://www.wateraid.org//uk/get-involved/givi...,Muslim Faith Giving,,,"Around the world, many Muslim communities do n...",Clean water and good hygiene means families ca...,,giving


In [3]:
combined = []

for index, row in df.iterrows():
    text_to_embed = row[1] + ". Location is " + row[3] + ". " + row[4] + " " + row[5]
    combined.append(text_to_embed)

df['combined'] = combined

In [4]:
df.head()


Unnamed: 0,Listing URL,Name of Activity,Date,Location,Event Synopsis,Event Description,Registration Link,Activity Category,combined
0,https://www.wateraid.org//uk/get-involved/givi...,Hire our Handwashing Hubs,,,Hire our contactless handwashing facilities to...,Frequent handwashing is one of the most effect...,,giving,Hire our Handwashing Hubs. Location is NA. Hir...
1,https://www.wateraid.org//uk/get-involved/even...,Swim Serpentine,14 September 2024,"Hyde Park, London",Enjoy a late summer swim in the beautiful surr...,Take in the views with a leisurely half mile o...,https://forms.office.com/Pages/ResponsePage.as...,events,"Swim Serpentine. Location is Hyde Park, London..."
2,https://www.wateraid.org//uk/get-involved/teac...,World Water Day activities,,,Use World Water Day on 22 March to introduce y...,703 million people in the world – that's almos...,,teaching resources,World Water Day activities. Location is NA. Us...
3,https://www.wateraid.org//uk/get-involved/teac...,Educational resources from your local water co...,,,Discover the education opportunities your loca...,"In 1981, WaterAid was set up by members of the...",,teaching,Educational resources from your local water co...
4,https://www.wateraid.org//uk/get-involved/givi...,Muslim Faith Giving,,,"Around the world, many Muslim communities do n...",Clean water and good hygiene means families ca...,,giving,Muslim Faith Giving. Location is NA. Around th...


In [5]:
# page_content_column is the column name in the dataframe to create embeddings for
loader = DataFrameLoader(df, page_content_column = 'combined')
docs = loader.load()

In [8]:
embeddings = HuggingFaceEmbeddings()

db = PGVector.from_documents(
    documents= docs,
    embedding = embeddings,
    collection_name= "test_listings",
    distance_strategy = DistanceStrategy.COSINE,
    connection_string=CONNECTION_STRING)

KeyboardInterrupt: 

In [9]:
# TESTING USAGE / QUERY / SEARCH OF VECTOR DATABASE 

store = PGVector(
    connection_string=CONNECTION_STRING, 
    embedding_function=embeddings, 
    collection_name="test_listings",
    distance_strategy=DistanceStrategy.COSINE
)

query = "I am based in Newcastle. I am a accountant. I like to watch variety shows in my free time. What activities would you recommend to me?"

docs =  store.similarity_search(query, k=3)


for doc in docs:
    doc_content = doc.page_content
    print(doc_content)

    doc_metadata = doc.metadata
    print(doc_metadata['Name of Activity'])
    print(doc_metadata['Listing URL'])


Yorkshire Three Peaks weekend. Location is Chapel-le-Dale, North Yorkshire. Summit Ingleborough, Whernside and Pen y Ghent on this tough – but achievable! – weekend. Join a small group of like-minded trekkers on this fully-supported challenge, led by the experts at Discover Adventure.How to register
Yorkshire Three Peaks weekend
https://www.wateraid.org//uk/get-involved/events/yorkshire-three-peaks-weekend
Bake for WaterAid. Location is NA. Sweet or savoury, everyone loves a bake sale – and few people would turn down a piece of cake for a good cause! If you don't want to be the only one slaving over a hot stove, get your colleagues or friends involved by adding a competitive element – nominate a few lucky judges and ask them to vote on the tastiest culinary contribution. Remind everyone it's all for a great cause with some WaterAid resources to draw attention to your sale. We can send you a tablecloth, balloons, bunting, leaflets and a collection tin – drop us a line at [email protecte

In [10]:
retriever = store.as_retriever(search_kwargs={"k": 3})


In [54]:
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM

wxa_url = "https://eu-gb.ml.cloud.ibm.com"
wxa_api_key = "ew9FSpkxGdAS91FvT_t4CjC30JYF-vRZayqRMDs7Afsb"
wxa_project_id = "573a5af9-21d8-414c-90ea-ca983ffa683c"

parameters = {
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MIN_NEW_TOKENS: 1,
    GenParams.MAX_NEW_TOKENS: 500
}


model = Model(
    # model_id=ModelTypes.GRANITE_13B_INSTRUCT_V2,
    model_id="ibm/granite-13b-instruct-v2",
    params=parameters,
    credentials={
        "url": wxa_url,
        "apikey": wxa_api_key
    },
    project_id=wxa_project_id
)

granite_llm_ibm = WatsonxLLM(model=model)

In [13]:
from langchain.chains import RetrievalQA
query = "I am based in Newcastle. I am a accountant. I like to watch variety shows in my free time. What activities would you recommend to me?"

qa_stuff = RetrievalQA.from_chain_type(
    llm=granite_llm_ibm, 
    chain_type="stuff", 
    retriever=retriever,
    verbose=True,
)

response = qa_stuff.run(query)

print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 There are many types of volunteer opportunities available with WaterAid. One of them is to join the Events team. In this role, you would get to travel to different locations and help with events. You would also get to develop your skills while helping others.


In [78]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

retriever = store.as_retriever(search_kwargs={"k": 3})

prompt = PromptTemplate(template="""

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}

##Question:{question} \n\

##Top 3 recommnedations of activities:\n""",input_variables=["context","question"])

chain_type_kwargs = {"prompt": prompt}

qa = RetrievalQA.from_chain_type(llm=granite_llm_ibm, chain_type="stuff",
                                 retriever=retriever,
                                 chain_type_kwargs=chain_type_kwargs,
                                 verbose=True)

query = "I like to run and swim in my free time. I am based in London. What activities are recommended for me based on the context provided - give the specific name of the activity? Give a reason why each activity is recommended for me."
retrieved_docs = retriever.get_relevant_documents(query)


res = qa.run(query)


print(query)
print(res)
print(retrieved_docs)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
I like to run and swim in my free time. I am based in London. What activities are recommended for me based on the context provided - give the specific name of the activity? Give a reason why each activity is recommended for me.
1. Swim Marathon: because I can take on a challenge like no other as I swim a marathon or half marathon distance over 12 weeks.
2. Henley Swim Festival: because it is a family-friendly festival, celebrating the joys of being next to, on, or in the river!
3. Thames Marathon: because it is a half marathon challenge (5.5km), or tackle the full 13km from Henley to Marlow!
[Document(page_content="Swim Marathon. Location is Pool, lido or open water. Take on a challenge like no other as you swim a marathon or half marathon distance over 12 weeks. Whether you’re already a swimming fanatic or you're looking to dip your toe in for the first time, take your health, wellbeing and fitness to the next le

In [79]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

retriever = store.as_retriever(search_kwargs={"k": 3})

prompt = PromptTemplate(template="""

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}

##Question:{question} \n\

##Top 3 recommnedations of activities:\n""",input_variables=["context","question"])

chain_type_kwargs = {"prompt": prompt}

qa = RetrievalQA.from_chain_type(llm=granite_llm_ibm, chain_type="stuff",
                                 retriever=retriever,
                                 chain_type_kwargs=chain_type_kwargs,
                                 verbose=True)

query = "I like to run and swim in my free time. I am based in London. What activities are recommended for me based on the context provided - give the specific name of the activity? Give a reason why each activity is recommended for me."
retrieved_docs = retriever.get_relevant_documents(query)


res = qa.run(query)


print(query)
print(res)
print(retrieved_docs)



[1m> Entering new RetrievalQA chain...[0m


Request failed with: {"errors":[{"code":"token_quota_reached","message":"Request of 1 token(s) from quota was rejected","more_info":"https://cloud.ibm.com/apidocs/watsonx-ai"}],"trace":"7cfd751e8608f72a7dc5ccbf377fb631","status_code":403} (403)


WMLClientError: Request failed with: {"errors":[{"code":"token_quota_reached","message":"Request of 1 token(s) from quota was rejected","more_info":"https://cloud.ibm.com/apidocs/watsonx-ai"}],"trace":"7cfd751e8608f72a7dc5ccbf377fb631","status_code":403} (403)