In [8]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

### CONFIGURATION ###
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "argentic"  




# ----------------------------
# 1. Connect to Neo4j and Retrieve Nodes
# ----------------------------

In [9]:

try:
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    # Test connection
    with driver.session() as session:
        session.run("RETURN 1")
    print("Connected to Neo4j successfully.")
except ServiceUnavailable as e:
    print("Neo4j connection error:", e)
    exit(1)

def get_nodes(label):
    with driver.session() as session:
        result = session.run(f"MATCH (n:{label}) RETURN n")
        nodes = [record["n"] for record in result]
    return nodes

# Retrieve nodes from the graph.
cities = get_nodes("City")
flights = get_nodes("Flight")
hotels = get_nodes("Hotel")
restaurants = get_nodes("Restaurant")
preferences = get_nodes("Preference")
users = get_nodes("User")

Connected to Neo4j successfully.


# ----------------------------
# 2. Create Clean Text Representations for Each Node
# ----------------------------

In [10]:


def build_representation(props, fields):
    parts = []
    for field, label in fields.items():
        value = props.get(field)
        if value is not None and str(value).strip() != "":
            parts.append(f"{label}: {value}")
    return "; ".join(parts)

def represent_city(node):
    fields = {
        "City": "City",
        "Country": "Country",
        "weather": "Weather",
        "avg_flight_cost": "Avg Flight Cost",
        "avg_hotel_cost": "Avg Hotel Cost"
    }
    return build_representation(node._properties, fields)

def represent_flight(node):
    fields = {
        "Flight": "Flight",
        "Departure": "Departure",
        "Arrival": "Arrival",
        "avg_cost": "Avg Cost"
    }
    return build_representation(node._properties, fields)

def represent_hotel(node):
    fields = {
        "Hotel": "Hotel",
        "City": "City",
        "Country": "Country",
        "avg_cost": "Avg Cost"
    }
    return build_representation(node._properties, fields)

def represent_restaurant(node):
    fields = {
        "Restaurant": "Restaurant",
        "City": "City",
        "Country": "Country",
        "rating": "Rating"
    }
    return build_representation(node._properties, fields)

def represent_preference(node):
    fields = {
        "user_id": "User ID",
        "preferences": "Preferences"
    }
    return build_representation(node._properties, fields)

def represent_user(node):
    fields = {
        "user_id": "User ID",
        "name": "Name"
    }
    return build_representation(node._properties, fields)

def get_clean_representations(nodes, represent_func):
    reps = []
    for node in nodes:
        rep = represent_func(node)
        if rep and "Unknown" not in rep:
            reps.append(rep)
    return reps

representations = []
representations += get_clean_representations(cities, represent_city)
representations += get_clean_representations(flights, represent_flight)
representations += get_clean_representations(hotels, represent_hotel)
representations += get_clean_representations(restaurants, represent_restaurant)
representations += get_clean_representations(preferences, represent_preference)
representations += get_clean_representations(users, represent_user)

if not representations:
    print("Warning: No clean representations found; using all available representations.")
    for node in cities:
        representations.append(represent_city(node))
    for node in flights:
        representations.append(represent_flight(node))
    for node in hotels:
        representations.append(represent_hotel(node))
    for node in restaurants:
        representations.append(represent_restaurant(node))
    for node in preferences:
        representations.append(represent_preference(node))
    for node in users:
        representations.append(represent_user(node))

representations = list(set(representations))
print(f"Total representations for retrieval: {len(representations)}")



Total representations for retrieval: 1000


# ----------------------------
# 3. Compute Embeddings for All Representations
# ----------------------------

In [11]:
print("Computing embeddings...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(representations, convert_to_tensor=True)

Computing embeddings...


# ----------------------------
# 4. Define a Retrieval Function
# ----------------------------

In [12]:
def retrieve_documents(query, top_k=3):
    query_embedding = embedder.encode([query], convert_to_tensor=True)
    cos_scores = cosine_similarity(query_embedding.cpu().numpy(), doc_embeddings.cpu().numpy())[0]
    top_indices = np.argsort(cos_scores)[::-1][:top_k]
    retrieved_docs = [representations[i] for i in top_indices]
    return retrieved_docs

# ----------------------------
# 5. LLM for Final Answer Generation (Streamlined Prompt)
# ----------------------------

In [13]:
generator = pipeline("text-generation", model="gpt2", max_length=150)

def generate_final_answer(query):
    retrieved_docs = retrieve_documents(query, top_k=3)
    # Build a clean prompt without repeating extraneous information.
    prompt = "Travel Data:\n" + "\n".join(retrieved_docs) + "\n\n"
    prompt += f"Question: {query}\nAnswer:"
    result = generator(prompt, max_length=150, num_return_sequences=1)
    generated_text = result[0]["generated_text"]
    # Remove the prompt from the output.
    answer = generated_text.replace(prompt, "").strip()
    # Optionally, split and take the first line if it contains extra information.
    answer = answer.split("\n")[0].strip()
    return answer


Device set to use cpu


# ----------------------------
# 6. Process Sample Questions and Save to CSV
# ----------------------------

In [20]:
sample_questions = [
    "What country is Miami in and what are the average travel costs there?",
    "List cities with sunny weather and affordable flight and hotel costs.",
    "What is the average flight cost to Paris?",
    "Which city offers last-minute travel deals?",
    "Find restaurants in New York with high ratings.",
    "What is the average hotel cost in Tokyo?",
    "Which flight provides the best balance of cost and travel time?",
    "List cities with rainy weather.",
    "What are common user travel preferences?",
    "Which city has a high cultural score and low hotel cost?",
    "Find flights departing from New York with low average cost.",
    "Which hotel in Paris has the best reviews?",
    "List restaurants in San Francisco with a rating above 4.5.",
    "Which city in Europe is known for its sunny weather?",
    "Find travel options for budget-conscious travelers.",
    "Which user has expressed interest in cultural attractions?",
    "List cities with a high average flight cost.",
    "Which flight has the shortest travel time?",
    "Find hotels with an average cost below $100.",
    "Which restaurant in Rome is best known for its authentic cuisine?",
    "List all cities offering last-minute deals.",
    "What is the average hotel cost in New York?",
    "Which city has the highest cultural score?",
    "Find flights with the best customer reviews.",
    "Which hotel offers the best value for money in London?",
    "List restaurants with diverse cuisine options in Los Angeles.",
    "Which flight is most popular among users?",
    "Find cities with both low flight and hotel costs.",
    "Which restaurant has the highest rating overall?",
    "List travel options for luxury seekers.",
    "Which city is known for its diverse cultural heritage?",
    "Find budget hotels in Miami.",
    "Which flight has the best on-time performance?",
    "List restaurants that are popular among locals.",
    "Which city offers the best combination of weather and cultural attractions?",
    "Find flights that operate internationally with low cost.",
    "Which hotel in Tokyo is recommended for business travelers?",
    "List cities with high tourist satisfaction ratings.",
    "Which restaurant offers the best outdoor dining experience?",
    "Find travel options that emphasize eco-friendly practices.",
    "Which flight offers flexible cancellation policies?",
    "List hotels that have received awards for service excellence.",
    "Which city has the best public transportation system for tourists?",
    "Find restaurants that offer vegan options.",
    "Which hotel in Paris offers the most affordable luxury experience?",
    "List cities with a growing number of international flights.",
    "Which flight offers the best connectivity between major hubs?",
    "Find travel packages that include both flight and hotel discounts.",
    "Which city has the best nightlife for travelers?",
    "Find budget-friendly restaurants in Chicago."
]

output_rows = []
for q in sample_questions:
    answer = generate_final_answer(q)
    output_rows.append({"Question": q, "Answer": answer})
    print("Q:", q)
    print("A:", answer)
    print("-----")

results_df = pd.DataFrame(output_rows)
results_df.to_csv("generated_travel_queries.csv", index=False)
print("CSV file 'generated_travel_queries.csv' created with question-answer pairs.")

# Close the Neo4j driver
driver.close()


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What country is Miami in and what are the average travel costs there?
A: Miami only happens in the US, but has a big population. There are four major cities: Miami-Dade, Miami Beach, Miami
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with sunny weather and affordable flight and hotel costs.
A: The cities that receive most of their services are those with the largest numbers of flights and hotels.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average flight cost to Paris?
A: $16,200 for this flight
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city offers last-minute travel deals?
A: Seattle (USA).
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find restaurants in New York with high ratings.
A: It's all New York City. We get about 5,000 visitors per day and that makes the rest Chicago and Miami to Chicago in
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average hotel cost in Tokyo?
A: We have only been able to find out how much hotel costs, but we do know how much the average tourist spends there.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight provides the best balance of cost and travel time?
A: Answer:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with rainy weather.
A: City: South Australia
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What are common user travel preferences?
A: 1. The United States. Travel on International Business Travel (UPTC) is regulated by the Travel Information and Travel Book (TIB). Based on that information, travelers can apply for federal health, transportation and housing assistance. These
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has a high cultural score and low hotel cost?
A: Austin, Texas. On a recent trip to South America, we met some of the country's top leaders, including Vice President Al Gore. With
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights departing from New York with low average cost.
A: Find the minimum required cost. Cost is measured by the airport/carrier.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Paris has the best reviews?
A: The best reviews are given for hotel-specific reasons, rather than the same ones from other social networking sites.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants in San Francisco with a rating above 4.5.
A: No, that's not possible. For more information about our rating, visit https://goo.gl/
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city in Europe is known for its sunny weather?
A: The Greek "virno" is "viking" and its "vietnost" is "viking-cynicum
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel options for budget-conscious travelers.
A: Traveling data is collected primarily for the purposes of booking and using various travel websites, and it is not necessarily accurate to make specific estimates with no
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which user has expressed interest in cultural attractions?
A: In August 2017, I visited The Great Lakes, the site of the World's Fair. I love watching nature! I like to have a good time
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with a high average flight cost.
A: None
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight has the shortest travel time?
A: A flight with shortest flight times will typically reach its maximum trip time in less than 10 minutes.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find hotels with an average cost below $100.
A: None. Hotel Cost: This category, not $100+ at first, is to be used.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant in Rome is best known for its authentic cuisine?
A: There are a couple of popular food chains in many European cities, but a real win for those who want to understand Italian dining trends
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List all cities offering last-minute deals.
A: Most businesses do offer this option, but for people at risk or without a plan, a small discount is more than necessary!
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: What is the average hotel cost in New York?
A: $30 a night
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the highest cultural score?
A: Honolulu
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights with the best customer reviews.
A: We do our best to report our flight delays as follows:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel offers the best value for money in London?
A: You can go with any hotel in London and you can find out which one has the best value per minute (usually at $40 to $50 each
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants with diverse cuisine options in Los Angeles.
A: You must provide an official menu, because it is not official.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight is most popular among users?
A: Flight Number: K933, G27-1H, G35-L
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find cities with both low flight and hotel costs.
A: San Diego and San Francisco
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant has the highest rating overall?
A: From:
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List travel options for luxury seekers.
A: Q: Are you sure travelers are at least 18 years old to enter the United States?
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city is known for its diverse cultural heritage?
A: New Bedford.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find budget hotels in Miami.
A: They're pretty terrible.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight has the best on-time performance?
A: The best way to determine which flight has the fastest on-time performance is to use Flight Quality in the Flight Data. See below. This will determine how
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List restaurants that are popular among locals.
A: Question: The first thing to notice when choosing restaurants in rural and urban settings are the locations of hotels with convenient access (although it should not be so apparent to a
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city offers the best combination of weather and cultural attractions?
A: Dallas, TX
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find flights that operate internationally with low cost.
A: The easiest way to find international flights is to head to a website like Credibility.org. The site has flights to Europe such as the
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Tokyo is recommended for business travelers?
A: Chino (K-G 7, Yamanashi-ku 9)
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with high tourist satisfaction ratings.
A: High-end rental city in the state of North Carolina. If you are in one of these cities, you can move in, but you will need the ID
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which restaurant offers the best outdoor dining experience?
A: I had high expectations for my next meal of meat tacos with beef. I was so surprised when I first came into this place. I really dig all
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel options that emphasize eco-friendly practices.
A: When it comes to travel, most places have a great combination of low carbon and sustainable living.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight offers flexible cancellation policies?
A: You might be interested to know that in April, Delta Airlines changed its scheduling policy based on factors such as weather, crew availability and availability to customers.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List hotels that have received awards for service excellence.
A: There are over 600 award-winning hotels.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the best public transportation system for tourists?
A: San Francisco
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find restaurants that offer vegan options.
A: A food that is prepared using meat, dairy and eggs, is labeled Vegan with no animal derived ingredients. This means the food is not suitable for
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which hotel in Paris offers the most affordable luxury experience?
A: A resort or hostel at the Bernadino (744 L'Auvagne, Paris);
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: List cities with a growing number of international flights.
A: If I am travelling to a city in a jurisdiction that has a large number of international flights and it has not
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which flight offers the best connectivity between major hubs?
A: As above, the majority of airports across the world offer both a "full" and "short" choice for connecting flights. These are usually the hubs
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Find travel packages that include both flight and hotel discounts.
A: Yes, the price shown above is a $20 booking discounts, but it is a more generous discount.
-----


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q: Which city has the best nightlife for travelers?
A: Vancouver, BC
-----
Q: Find budget-friendly restaurants in Chicago.
A: In the last two years, Chicago has hired the largest private research organization in the country, a nonprofit group that takes money from the state of Illinois to create and
-----
CSV file 'generated_travel_queries.csv' created with question-answer pairs.
