In [1]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

### CONFIGURATION ###
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "argentic"  




1. Connect to Neo4j and Retrieve Nodes

In [2]:

try:
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    # Test connection
    with driver.session() as session:
        session.run("RETURN 1")
    print("Connected to Neo4j successfully.")
except ServiceUnavailable as e:
    print("Neo4j connection error:", e)
    exit(1)

def get_nodes(label):
    with driver.session() as session:
        result = session.run(f"MATCH (n:{label}) RETURN n")
        nodes = [record["n"] for record in result]
    return nodes

# Retrieve nodes from the graph.
cities = get_nodes("City")
flights = get_nodes("Flight")
hotels = get_nodes("Hotel")
restaurants = get_nodes("Restaurant")
preferences = get_nodes("Preference")
users = get_nodes("User")

Connected to Neo4j successfully.


# ----------------------------
# 2. Create Clean Text Representations for Each Node
# ----------------------------

In [3]:


def build_representation(props, fields):
    parts = []
    for field, label in fields.items():
        value = props.get(field)
        if value is not None and str(value).strip() != "":
            parts.append(f"{label}: {value}")
    return "; ".join(parts)

def represent_city(node):
    fields = {
        "City": "City",
        "Country": "Country",
        "weather": "Weather",
        "avg_flight_cost": "Avg Flight Cost",
        "avg_hotel_cost": "Avg Hotel Cost"
    }
    return build_representation(node._properties, fields)

def represent_flight(node):
    fields = {
        "Flight": "Flight",
        "Departure": "Departure",
        "Arrival": "Arrival",
        "avg_cost": "Avg Cost"
    }
    return build_representation(node._properties, fields)

def represent_hotel(node):
    fields = {
        "Hotel": "Hotel",
        "City": "City",
        "Country": "Country",
        "avg_cost": "Avg Cost"
    }
    return build_representation(node._properties, fields)

def represent_restaurant(node):
    fields = {
        "Restaurant": "Restaurant",
        "City": "City",
        "Country": "Country",
        "rating": "Rating"
    }
    return build_representation(node._properties, fields)

def represent_preference(node):
    fields = {
        "user_id": "User ID",
        "preferences": "Preferences"
    }
    return build_representation(node._properties, fields)

def represent_user(node):
    fields = {
        "user_id": "User ID",
        "name": "Name"
    }
    return build_representation(node._properties, fields)

def get_clean_representations(nodes, represent_func):
    reps = []
    for node in nodes:
        rep = represent_func(node)
        if rep and "Unknown" not in rep:
            reps.append(rep)
    return reps

representations = []
representations += get_clean_representations(cities, represent_city)
representations += get_clean_representations(flights, represent_flight)
representations += get_clean_representations(hotels, represent_hotel)
representations += get_clean_representations(restaurants, represent_restaurant)
representations += get_clean_representations(preferences, represent_preference)
representations += get_clean_representations(users, represent_user)

if not representations:
    print("Warning: No clean representations found; using all available representations.")
    for node in cities:
        representations.append(represent_city(node))
    for node in flights:
        representations.append(represent_flight(node))
    for node in hotels:
        representations.append(represent_hotel(node))
    for node in restaurants:
        representations.append(represent_restaurant(node))
    for node in preferences:
        representations.append(represent_preference(node))
    for node in users:
        representations.append(represent_user(node))

representations = list(set(representations))
print(f"Total representations for retrieval: {len(representations)}")



Total representations for retrieval: 1000


# ----------------------------
# 3. Compute Embeddings for All Representations
# ----------------------------

In [4]:
print("Computing embeddings...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(representations, convert_to_tensor=True)

Computing embeddings...


# ----------------------------
# 4. Define a Retrieval Function
# ----------------------------

In [5]:
def retrieve_documents(query, top_k=3):
    query_embedding = embedder.encode([query], convert_to_tensor=True)
    cos_scores = cosine_similarity(query_embedding.cpu().numpy(), doc_embeddings.cpu().numpy())[0]
    top_indices = np.argsort(cos_scores)[::-1][:top_k]
    retrieved_docs = [representations[i] for i in top_indices]
    return retrieved_docs

# ----------------------------
# 5. LLM for Final Answer Generation (Streamlined Prompt)
# ----------------------------

In [6]:
generator = pipeline("text-generation", model="gpt2", max_length=150)

def generate_final_answer(query):
    retrieved_docs = retrieve_documents(query, top_k=3)
    # Build a clean prompt without repeating extraneous information.
    prompt = "Travel Data:\n" + "\n".join(retrieved_docs) + "\n\n"
    prompt += f"Question: {query}\nAnswer:"
    result = generator(prompt, max_length=150, num_return_sequences=1)
    generated_text = result[0]["generated_text"]
    # Remove the prompt from the output.
    answer = generated_text.replace(prompt, "").strip()
    # Optionally, split and take the first line if it contains extra information.
    answer = answer.split("\n")[0].strip()
    return answer


Device set to use cpu


# ----------------------------
# 6. Process Sample Questions and Save to CSV
# ----------------------------

In [7]:
sample_questions = [
    "What country is Miami in and what are the average travel costs there?",
    "List cities with sunny weather and affordable flight and hotel costs.",
    "What is the average flight cost to Paris?",
    "Which city offers last-minute travel deals?",
    "Find restaurants in New York with high ratings.",
    "What is the average hotel cost in Tokyo?",
    "Which flight provides the best balance of cost and travel time?",
    "List cities with rainy weather.",
    "What are common user travel preferences?",
    "Which city has a high cultural score and low hotel cost?",
    "Find flights departing from New York with low average cost.",
    "Which hotel in Paris has the best reviews?",
    "List restaurants in San Francisco with a rating above 4.5.",
    "Which city in Europe is known for its sunny weather?",
    "Find travel options for budget-conscious travelers.",
    "Which user has expressed interest in cultural attractions?",
    "List cities with a high average flight cost.",
    "Which flight has the shortest travel time?",
    "Find hotels with an average cost below $100.",
    "Which restaurant in Rome is best known for its authentic cuisine?",
    "List all cities offering last-minute deals.",
    "What is the average hotel cost in New York?",
    "Which city has the highest cultural score?",
    "Find flights with the best customer reviews.",
    "Which hotel offers the best value for money in London?",
    "List restaurants with diverse cuisine options in Los Angeles.",
    "Which flight is most popular among users?",
    "Find cities with both low flight and hotel costs.",
    "Which restaurant has the highest rating overall?",
    "List travel options for luxury seekers.",
    "Which city is known for its diverse cultural heritage?",
    "Find budget hotels in Miami.",
    "Which flight has the best on-time performance?",
    "List restaurants that are popular among locals.",
    "Which city offers the best combination of weather and cultural attractions?",
    "Find flights that operate internationally with low cost.",
    "Which hotel in Tokyo is recommended for business travelers?",
    "List cities with high tourist satisfaction ratings.",
    "Which restaurant offers the best outdoor dining experience?",
    "Find travel options that emphasize eco-friendly practices.",
    "Which flight offers flexible cancellation policies?",
    "List hotels that have received awards for service excellence.",
    "Which city has the best public transportation system for tourists?",
    "Find restaurants that offer vegan options.",
    "Which hotel in Paris offers the most affordable luxury experience?",
    "List cities with a growing number of international flights.",
    "Which flight offers the best connectivity between major hubs?",
    "Find travel packages that include both flight and hotel discounts.",
    "Which city has the best nightlife for travelers?",
    "Find budget-friendly restaurants in Chicago."
]

output_rows = []
for q in sample_questions:
    answer = generate_final_answer(q)
    output_rows.append({"Question": q, "Answer": answer})
    print("Q:", q)
    print("A:", answer)
    print("-----")

results_df = pd.DataFrame(output_rows)
results_df.to_csv("generated_travel_queries.csv", index=False)
print("CSV file 'generated_travel_queries.csv' created with question-answer pairs.")

# Close the Neo4j driver
driver.close()


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What country is Miami in and what are the average travel costs there?
A: In Miami, $1.2 billion a year, and in other major cities like Boston, Philadelphia, New York, Chicago (including those
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with sunny weather and affordable flight and hotel costs.
A: Answer #2: Sunny days are not generally associated with better flight times for passengers than daytime on average.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average flight cost to Paris?
A: $60,000 (including the cost of a plane ticket to Paris from Sydney to Sydney airport), $40,000 for a ticket to Boston, $9,000 with a flight
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city offers last-minute travel deals?
A: First, the city of New London is often called the city of big-name athletes — a city where athletes and their sponsors tend to live, where
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find restaurants in New York with high ratings.
A: I found their Yelp search results the fastest. They had 30,000 reviews. If we put a number of reviews into the list,
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average hotel cost in Tokyo?
A: Hotels on average cost 3.5 times as much as hotels in any other region.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight provides the best balance of cost and travel time?
A: The same thing as the Customer Service. The best balance of travel time could mean different benefits to each flight. (For example, a JetBlue
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with rainy weather.
A: I live in Perth and Brisbane. For Australia's driest part of the month, I live in Perth and the west. My question about rain in Perth
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What are common user travel preferences?
A: Use this as a starting point as you look at some of the other options below.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has a high cultural score and low hotel cost?
A: The number of foreign nationals in Mexico is about a hundred thousand higher than the highest cities in the nation, though that is probably due to the higher
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights departing from New York with low average cost.
A: Find on the internet, you can find more detailed information on it. You will receive a list of flights for your trip for a fee (which you will
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Paris has the best reviews?
A: The Michelin starren of Paris Hilton, Nicole Henry, writes the most frequently and enjoys the best reviews of most restaurants in the Parisian town. She also points out
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants in San Francisco with a rating above 4.5.
A: Restaurant in San Francisco with rated above 3 is actually the Golden Gate Bridge in San Francisco. If you do
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city in Europe is known for its sunny weather?
A: Paris (the Netherlands); the French capital of Paris (France); and the German city of Dresden. Which cities in Europe do you think there
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel options for budget-conscious travelers.
A: A map and user interface for budget travelers that are open to the public is available at https://www.travelfinance.com/guide/
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which user has expressed interest in cultural attractions?
A: The following UserID may represent: 1. a. p.m. in Ottawa; b. l. p.m. in Toronto and Vancouver
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with a high average flight cost.
A: City: Oakland
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight has the shortest travel time?
A: No flight leaves the airport at 35 minutes or less.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find hotels with an average cost below $100.
A: We are unable to offer this price on the website or at the hotels. We do not want to discourage you from taking this journey.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant in Rome is best known for its authentic cuisine?
A: Question: What are some of the best Italian restaurants in Rome? Answer:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List all cities offering last-minute deals.
A: You may try listing each in the order on which you want to list.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average hotel cost in New York?
A: This is based on the best experience. The average hotel cost in New York includes everything from general admission ticket prices to reservations and rooms. The average hotel costs in
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the highest cultural score?
A: Chicago
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights with the best customer reviews.
A: We only get a good few on Yelp, but they offer a lot of reviews that are good for you, just because you're on a business trip. Also
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel offers the best value for money in London?
A: On top of the free hotel stay which is not available in New York or anywhere else, if you have a valid ticket for London, check the ticket
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants with diverse cuisine options in Los Angeles.
A: In all our restaurants throughout the Southern California area we only require a very simple kitchen.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight is most popular among users?
A: Flight Name:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find cities with both low flight and hotel costs.
A: Example:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant has the highest rating overall?
A: Question: How many times did I receive your email requesting an autograph in my shop?
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List travel options for luxury seekers.
A: All options are listed.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city is known for its diverse cultural heritage?
A: Miami, FL
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find budget hotels in Miami.
A: Look into the reports.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight has the best on-time performance?
A: Boeing 737-800 for flight 3 at 1137 am and Boeing 777-200 for flight 16 at 909 am
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants that are popular among locals.
A: The list was originally developed from a data set about the restaurants visited by locals and not of the locals who are served. We now know that it was compiled with information on
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city offers the best combination of weather and cultural attractions?
A: Chicago is in the North-Central region with the best amount of traffic in the city.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights that operate internationally with low cost.
A: See my question below
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Tokyo is recommended for business travelers?
A: I am traveling mainly for business. A good place is the Hyena Hotel in Osaka. For the latest version of the database
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with high tourist satisfaction ratings.
A: City: San Antonio
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant offers the best outdoor dining experience?
A: The following are our top five choices for restaurant-themed outdoor dining.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel options that emphasize eco-friendly practices.
A: Many people use the hashtag #visitorcare to share interesting information and information to help people get the most from the different programs.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight offers flexible cancellation policies?
A: From the Customer Data Center:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List hotels that have received awards for service excellence.
A: Yes!
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the best public transportation system for tourists?
A: Toronto, BC's (Merrill, NJ), Toronto West, Toronto West West, Toronto West East are both public transit systems along the shore of Lake
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find restaurants that offer vegan options.
A: To avoid confusion with this question, we have included a link to our database of restaurants that have vegan menus.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Paris offers the most affordable luxury experience?
A: Not sure I'm going to answer that one, though I wouldn't say that I'm too old to be able to offer those. What I
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with a growing number of international flights.
A: As for US cities, this includes NYC, Minneapolis, San Francisco, Raleigh-Durham, Newark-Saras
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight offers the best connectivity between major hubs?
A: Seattle (2nd most congested, 2nd most landlocked) versus Dallas and Portland (3rd least congested, 3rd least landlocked).
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel packages that include both flight and hotel discounts.
A: We have several, but we aren't looking at flights you can get or hotel discounts.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the best nightlife for travelers?
A: Minneapolis Minneapolis Minneapolis
-----
Q: Find budget-friendly restaurants in Chicago.
A: Chicago is known for a lot of high quality food: it has a lot of good restaurants (like the $27.99 Chicken Fried Chicken and $36.
-----
CSV file 'generated_travel_queries.csv' created with question-answer pairs.
