In [2]:
import ollama
import pandas as pd 
import numpy as np 
from sentence_transformers import SentenceTransformer
import os
import lancedb

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data_dir = "../../european-city-data/data-sources/"
wikivoyage_docs_dir = data_dir + "wikivoyage/"
wikivoyage_listings_dir = wikivoyage_docs_dir + "listings/"

In [4]:
def embed_query(query):
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    # vector_dimension = model.get_sentence_embedding_dimension()   
    embedding = model.encode(query).tolist()
    print("Finished encoding the query.")
    return embedding

def get_context(query):
    uri = "../database/wikivoyage-listings-db"
    db = lancedb.connect(uri)
    print("Connected to DB.")

    query_embedding = embed_query(query)
    table = db.open_table("wikivoyage_listings")
    results = table.search(query_embedding).limit(5).to_list()
    print("Found the most relevant documents.")

    context = [f"city: {r['city']}, country: {r['country']}, name: {r['title']}, description: {r['description']}" for r in results]

    return context

In [24]:
def rag_qa(question, context = None):
    base_prompt = """You are an AI assistant. Your task is to understand the user question, and provide an answer using the provided contexts. Every answer you generate should have citations in this pattern  "Answer [position].", for example: "Earth is round [1][2].," if it's relevant.
    Your answers are correct, high-quality, and written by an domain expert. If the provided context does not contain the answer, simply state, "The provided context does not have the answer."

    User question: {}

    Contexts:
    {}
    """

    # llm
    if not context:
        context = get_context(question)

    prompt = f"{base_prompt.format(question, context)}"
    # print("Prompt to LLM:", prompt)

    response = ollama.chat(
        model="gemma:2b",
        messages=[
            {
                "role": "system",
                "content": prompt,
            },
        ],
    )
    print("Finished generating response.")

    return response["message"]["content"]

In [6]:
# question = "What cities should I visit in Europe that have modern art museums?"
question = "I like hiking and nature. What cities do you recommend I visit?"

answer = rag_qa(question)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
Finished generating response.


In [7]:
answer

'The context does not mention any cities that would be suitable for hiking and nature, so I cannot generate an answer from the context.'

In [25]:
c = get_context(question)
print(c)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
["city: Tampere, country: Finland, name: Hiking, description: In Finland, everyone has a right of public access to the wilderness provided that you don't cause damage. In Tampere, good hiking/jogging/cycling/skiing grounds with marked paths can be found in the Pyynikki and Kauppi forests. In Hervanta, there's a popular 4km loop trail around lake Suolijärvi (take bus number 30 to get there).", 'city: Ivano-Frankivsk, country: Ukraine, name: Yaremche village and Waterfall "Probiy", description: Many outdoor option. Good base for a hike to Peak Hoverla the highest place in Ukraine. Also there is the Vorokhta ski resort', 'city: Kaliningrad, country: Russia, name: Trek in the forests, description: Soak up nature in the untouched forests', 'city: Trabzon, country: Turkey, name: Pontic Mountains, description: Many trekking, hiking, possibility. Zil Castle, Palovit Waterfall', 'city: Minsk, country: Belarus, name

In [26]:
rag_qa(question, c)

Finished generating response.


'The context does not provide any recommendations for cities to hike in, so I cannot generate an answer from the context.'

In [12]:
example = "Which are the best restaurants in Germany to eat Italian food?"
rag_qa(example)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
Finished generating response.


'The context does not provide any information about the best restaurants in Germany to eat Italian food, so I cannot generate an answer to this question from the provided context.'

In [16]:
rag_qa(example, get_context(example))

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
Prompt to LLM: You are an AI assistant. Your task is to understand the user question, and provide an answer using the provided contexts. Every answer you generate should have citations in this pattern  "Answer [position].", for example: "Earth is round [1][2].," if it's relevant.
    Your answers are correct, high-quality, and written by an domain expert. If the provided context does not contain the answer, simply state, "The provided context does not have the answer."

    User question: Which are the best restaurants in Germany to eat Italian food?

    Contexts:
    ['city: Stuttgart, country: Germany, name: Ristorante Italiani, description: Good Italian food, friendly efficient service.', 'city: Aalborg, country: Denmark, name: San Giovanni, description: Good Italian restaurant', 'city: Dresden, country: Germany, name: Vecchia Napoli, description: A good Italian restaurant, with a wood fired pizza oven

'The provided context does not mention the best restaurants in Germany to eat Italian food, so I cannot answer this question from the provided context.'

In [18]:
q = "Tell me some things to do in Croatia"
answer = rag_qa(q)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
Prompt to LLM: You are an AI assistant. Your task is to understand the user question, and provide an answer using the provided contexts. Every answer you generate should have citations in this pattern  "Answer [position].", for example: "Earth is round [1][2].," if it's relevant.
    Your answers are correct, high-quality, and written by an domain expert. If the provided context does not contain the answer, simply state, "The provided context does not have the answer."

    User question: Tell me some things to do in Croatia

    Contexts:
    ['city: Rijeka, country: Croatia, name: Parks and promenades, description: Nature lovers and more active visitors can discover the city in a different way – by researching Rijeka’s promenades by the sea, urban or forest areas, and for those less active, parks are the best option for taking a break during a walk around Rijeka.', 'city: Rijeka, country: Croatia, name: 

In [27]:
print(f"Prompt: {q}\n\n")
print(answer)

Prompt: Tell me some things to do in Croatia


**Answer 1.**
Rijeka has a beautiful coastline with several stunning beaches, each catering to different tastes. From the lively and bustling beaches of Rijeka to the secluded and peaceful beaches of the nearby islands, there is a perfect beach for everyone.

**Answer 2.**
Rijeka is a vibrant city with a rich history and culture. Hotel Neboder is a luxurious hotel with stunning views of the city, making it an ideal choice for travelers seeking a luxurious stay.

**Answer 3.**
Rijeka offers a wide array of dining options to suit all tastes and budgets. From traditional Croatian cuisine to international dishes, there is something to tantalize your taste buds.

**Answer 4.**
The Adriatic Sea boasts some of the most beautiful diving spots in Europe, with breathtaking underwater cliffs and caves that offer a plethora of diving opportunities for divers of all levels.


In [30]:
query = "Suggest some places to visit in France for the month of September for a user who is travelling from Munich"
rag_qa(query)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.
Finished generating response.


'The provided context does not mention any places to visit in France for the month of September for a user travelling from Munich, so I cannot generate an answer from the context.'

In [31]:
query = "Suggest some places to visit in France for the month of September for a user who is travelling from Munich"
get_context(query)

Connected to DB.
Finished encoding the query.
Found the most relevant documents.


['city: Munich, country: Germany, name: Tollwood, description: This three week long festival combines ethnic food, souvenir shops, concerts & theater and it is very popular among the locals.',
 "city: Munich, country: Germany, name: River-Surfing, description: In spring, join the locals surfing on the river at the edge of the ''Englischer Garten'', at the bridge near the Lehel U-bahn station.",
 'city: Dresden, country: Germany, name: Tips in English, description: weekly tips in English covering events which might be of particular interest to visitors',
 "city: Rennes, country: France, name: campsite, description: The at Rennes is rather large. To get there, take the number 3 bus, and get off at the ''Piscine/Gayeulles'' stop. The neighbouring park has much to offer, including an ice rink, a pool and sports facilities.",
 'city: Stuttgart, country: Germany, name: Mercedes-Benz Museum, description: Built in 2006 in an astonishing architecture, the Mercedes-Benz museum houses a collectio