In [1]:
import random
import itertools
import json

# --------------------------
# Generate Sample JSON Data
# --------------------------

cities = [f"City{i}" for i in range(1, 11)]

# 1. Attractions JSON
attractions_json = []
for city in cities:
    attractions = []
    for i in range(5):
        attractions.append({
            "name": f"{city} Attraction {i+1}",
            "rating": round(random.uniform(3.5, 5.0), 1),
            "price": random.randint(100, 500)
        })
    attractions_json.append({"city": city, "attractions": attractions})

# 2. Restaurants JSON
restaurants_json = []
for city in cities:
    for i in range(4):
        restaurants_json.append({
            "city": city,
            "restaurant_name": f"{city} Restaurant {i+1}",
            "rating": round(random.uniform(3.0, 5.0), 1),
            "price": random.randint(150, 600)
        })

# 3. Hotels JSON
hotels_json = []
for city in cities:
    hotels = []
    for i in range(3):
        hotels.append({
            "name": f"{city} Hotel {i+1}",
            "price_per_night": random.randint(1000, 3000)
        })
    hotels_json.append({"city": city, "hotels": hotels})

# 4. Flights JSON (2 per city pair)
flights_json = []
city_pairs = list(itertools.permutations(cities, 2))
for (from_city, to_city) in city_pairs:
    for i in range(2):
        flights_json.append({
            "flight_name": f"{from_city[:3]}-{to_city[:3]}-{i+1}",
            "flight_from": from_city,
            "flight_to": to_city,
            "price": random.randint(1500, 5000),
            "duration_hr": round(random.uniform(1.0, 4.0), 1)
        })

# Return everything for next steps
(
    attractions_json,
    restaurants_json,
    hotels_json,
    flights_json
)


([{'city': 'City1',
   'attractions': [{'name': 'City1 Attraction 1', 'rating': 3.5, 'price': 473},
    {'name': 'City1 Attraction 2', 'rating': 4.9, 'price': 120},
    {'name': 'City1 Attraction 3', 'rating': 4.9, 'price': 351},
    {'name': 'City1 Attraction 4', 'rating': 4.2, 'price': 377},
    {'name': 'City1 Attraction 5', 'rating': 3.8, 'price': 332}]},
  {'city': 'City2',
   'attractions': [{'name': 'City2 Attraction 1', 'rating': 4.8, 'price': 110},
    {'name': 'City2 Attraction 2', 'rating': 4.5, 'price': 403},
    {'name': 'City2 Attraction 3', 'rating': 4.7, 'price': 346},
    {'name': 'City2 Attraction 4', 'rating': 3.7, 'price': 367},
    {'name': 'City2 Attraction 5', 'rating': 4.4, 'price': 174}]},
  {'city': 'City3',
   'attractions': [{'name': 'City3 Attraction 1', 'rating': 4.8, 'price': 160},
    {'name': 'City3 Attraction 2', 'rating': 4.8, 'price': 303},
    {'name': 'City3 Attraction 3', 'rating': 4.9, 'price': 482},
    {'name': 'City3 Attraction 4', 'rating': 5

Here’s what we now have ready:

✔️ Generated JSON Structures:
Attractions JSON – 5 attractions per city with name, rating, and price

Restaurants JSON – 4 restaurants per city with name, rating, and price

Hotels JSON – 3 hotels per city with name and price per night

Flights JSON – 2 flights between each city pair (full permutation of 10 cities = 90 pairs → 180 flight options)

🚀 Next Step:
We will now use this data to run the entire end-to-end pipeline, including:

City scoring based on attraction, food, and affordability

Selecting top N cities (e.g., 5)

Route optimization based on flight costs

Day allocation per city

Budget estimation

Day-by-day itinerary creation

Final RAG input structure generation

In [6]:
! pip install ace_tools



In [7]:
import pandas as pd
from collections import defaultdict

# -------------------------------
# Step 1: City Scoring
# -------------------------------

city_scores = {}

for city in cities:
    # Attractions
    city_attractions = next(item['attractions'] for item in attractions_json if item['city'] == city)
    avg_attraction_rating = sum(a['rating'] for a in city_attractions) / len(city_attractions)
    total_attraction_price = sum(a['price'] for a in city_attractions)

    # Restaurants
    city_restaurants = [r for r in restaurants_json if r['city'] == city]
    avg_restaurant_rating = sum(r['rating'] for r in city_restaurants) / len(city_restaurants)
    avg_food_price = sum(r['price'] for r in city_restaurants) / len(city_restaurants)

    # Hotels
    city_hotels = next(item['hotels'] for item in hotels_json if item['city'] == city)
    avg_hotel_price = sum(h['price_per_night'] for h in city_hotels) / len(city_hotels)

    # Composite Score
    score = (
        0.4 * avg_attraction_rating +
        0.3 * avg_restaurant_rating +
        0.3 * (1 / (avg_hotel_price + avg_food_price + total_attraction_price)) * 10000
    )
    city_scores[city] = {
        "score": score,
        "avg_attraction_rating": avg_attraction_rating,
        "avg_restaurant_rating": avg_restaurant_rating,
        "avg_hotel_price": avg_hotel_price,
        "avg_food_price": avg_food_price,
        "total_attraction_price": total_attraction_price
    }

# Select top 5 cities
top_cities = sorted(city_scores.items(), key=lambda x: x[1]['score'], reverse=True)[:5]
selected_cities = [city for city, _ in top_cities]

# -------------------------------
# Step 2: Route Optimization (Greedy)
# -------------------------------

# Build city-to-city cost map
flight_map = defaultdict(lambda: defaultdict(list))
for flight in flights_json:
    if flight['flight_from'] in selected_cities and flight['flight_to'] in selected_cities:
        flight_map[flight['flight_from']][flight['flight_to']].append(flight)

# Pick cheapest flight between cities
flight_costs = {
    (src, dst): min(flight_map[src][dst], key=lambda f: f['price'])
    for src in selected_cities for dst in selected_cities if src != dst and dst in flight_map[src]
}

# Simple greedy TSP
start_city = selected_cities[0]
unvisited = set(selected_cities)
route = [start_city]
unvisited.remove(start_city)

current_city = start_city
while unvisited:
    next_city = min(unvisited, key=lambda c: flight_costs[(current_city, c)]['price'] if (current_city, c) in flight_costs else float('inf'))
    route.append(next_city)
    unvisited.remove(next_city)
    current_city = next_city

# -------------------------------
# Step 3: Day Allocation
# -------------------------------

total_days = 8
base_days = {city: 1 for city in route}
remaining_days = total_days - len(route)

# Allocate extra days to cities with higher attraction rating
while remaining_days > 0:
    best_city = max(route, key=lambda c: city_scores[c]["avg_attraction_rating"])
    base_days[best_city] += 1
    remaining_days -= 1

# -------------------------------
# Step 4: Budget Calculation
# -------------------------------

budget = 200000  # INR

itinerary = []
total_hotel_cost = 0
total_food_cost = 0
total_attraction_cost = 0
total_flight_cost = 0

for i, city in enumerate(route):
    days = base_days[city]
    hotel_cost = city_scores[city]["avg_hotel_price"] * days
    food_cost = city_scores[city]["avg_food_price"] * 3 * days
    attraction_cost = city_scores[city]["total_attraction_price"]

    total_hotel_cost += hotel_cost
    total_food_cost += food_cost
    total_attraction_cost += attraction_cost

    # Flights between cities
    if i < len(route) - 1:
        f = flight_costs[(city, route[i + 1])]
        total_flight_cost += f['price']

    # Day-wise itinerary
    city_attractions = next(item['attractions'] for item in attractions_json if item['city'] == city)
    city_restaurants = [r for r in restaurants_json if r['city'] == city]
    for day in range(days):
        attractions_today = random.sample(city_attractions, 3)
        restaurant = random.choice(city_restaurants)
        itinerary.append({
            "day": len(itinerary) + 1,
            "city": city,
            "attractions": [a['name'] for a in attractions_today],
            "restaurant": restaurant['restaurant_name'],
            "notes": f"Enjoy dinner at {restaurant['restaurant_name']} rated {restaurant['rating']}"
        })

# -------------------------------
# Step 5: RAG Input Preparation
# -------------------------------

rag_input = {
    "user_info": {
        "destination": "India",
        "budget_in_inr": budget,
        "from_date": "2025-05-20",
        "to_date": "2025-05-28"
    },
    "selected_cities": route,
    "days_allocation": base_days,
    "daily_itinerary": itinerary,
    "costs": {
        "hotel": total_hotel_cost,
        "food": total_food_cost,
        "attraction": total_attraction_cost,
        "flights": total_flight_cost,
        "total_estimated": total_hotel_cost + total_food_cost + total_attraction_cost + total_flight_cost
    }
}

with open("final_rag_input.json", "w") as f:
    json.dump(rag_input, f, indent=4)

print("✔️ RAG input saved to final_rag_input.json")

✔️ RAG input saved to final_rag_input.json
