In [1]:
import asyncio
import aiohttp
import json

from attraction_api import fetch_attractions
from hotel_api import fetch_hotels
from flight_api import fetch_flights  # make sure this signature is (session, city, date, airport)


In [2]:
itinerary_path = '/Users/dhruv/Desktop/LLM_project/TravelGenie/itinerary_USA.json'  # adjust path if needed
# Adjust path if needed
with open(itinerary_path, "r") as f:
    itinerary = json.load(f)

# itinerary is now a dict: { city: {"Date": "...", "Airport": "..."} }
print("Loaded cities and parameters:")
for city, info in itinerary.items():
    print(f" • {city}: Date={info['Date']}, Airport={info['Airport']}")


Loaded cities and parameters:
 • Austin: Date=2025-06-01, Airport=AUS
 • Frisco: Date=2025-06-04, Airport=DDJ
 • San Antonio: Date=2025-06-07, Airport=SAT
 • Dallas: Date=2025-06-10, Airport=DFW
 • Houston: Date=2025-06-12, Airport=IAH


In [3]:
async def gather_all(session, itinerary):
    tasks = []
    for city, info in itinerary.items():
        date    = info["Date"]
        airport = info["Airport"]
        # enqueue each API call
        tasks.append(fetch_attractions(session, city))
        tasks.append(fetch_hotels(     session, city))
        # tasks.append(fetch_flights(    session, city, date, airport))
    # fire them all at once
    return await asyncio.gather(*tasks)

async def run_all(itinerary):
    async with aiohttp.ClientSession() as session:
        return await gather_all(session, itinerary)


In [4]:
import json

# 1) Run all API + cleaning tasks
#    (using top‑level await since we're in Jupyter)
results = await run_all(itinerary)

# 2) Debug: print raw module outputs for each city
print("🔍 Raw module outputs:")
for entry in results:
    city = entry["city"]
    # pretty‑print each key
    print(f"\n=== {city} ===")
    # attraction_api returns key "attractions"
    print("Attractions raw:")
    print(json.dumps(entry.get("attractions", entry), indent=2, default=str))
    # hotel_api returns key "hotels"
    print("Hotels raw:")
    print(json.dumps(entry.get("hotels", entry), indent=2, default=str))
    # flight_api returns key "flights"
    print("Flights raw:")
    print(json.dumps(entry.get("flights", entry), indent=2, default=str))

# 3) Now merge as before
combined = {}
for entry in results:
    city = entry["city"]
    combined.setdefault(city, {}).update(entry)

final_list = [
    {
        "city":        city,
        "attractions": combined[city].get("attractions", []),
        "hotels":      combined[city].get("hotels", []),
        "flights":     combined[city].get("flights", [])
    }
    for city in itinerary.keys()
]

print("\n✅ After merging, sample output for first city:")
print(json.dumps(final_list[0], indent=2))

[Attraction] 🔍 Fetching location_id for: Austin
[Attraction] 🔍 Fetching location_id for: Frisco
[Attraction] 🔍 Fetching location_id for: San Antonio
[Attraction] 🔍 Fetching location_id for: Dallas
[Attraction] 🔍 Fetching location_id for: Houston
[Attraction] ✅ location_id for Dallas = 55711
[Attraction] 🔍 Fetching attractions for Dallas
[Attraction] 🌐 Raw results count for Dallas: 20
[Attraction] 🎯 Returning 10 attractions for Dallas
[Attraction] ✅ location_id for Austin = 30196
[Attraction] 🔍 Fetching attractions for Austin
[Attraction] ✅ location_id for San Antonio = 60956
[Attraction] 🔍 Fetching attractions for San Antonio
[Attraction] ✅ location_id for Houston = 56003
[Attraction] 🔍 Fetching attractions for Houston
[Attraction] 🌐 Raw results count for San Antonio: 20
[Attraction] 🎯 Returning 10 attractions for San Antonio
[Attraction] ✅ location_id for Frisco = 55870
[Attraction] 🔍 Fetching attractions for Frisco
[Attraction] 🌐 Raw results count for Houston: 20
[Attraction] 🎯 Retur

In [5]:
# # Cell 1: Imports
# import json, aiohttp, nest_asyncio
# from pprint import pprint
# from attraction_api import fetch_attractions

# # allow nested event loop
# nest_asyncio.apply()

# # Cell 2: Load itinerary
# with open("/Users/dhruv/Desktop/LLM_project/TravelGenie/itinerary_india.json","r") as f:
#     itinerary = json.load(f)
# cities = list(itinerary.keys())
# print("Scheduling attractions for:", cities)

# # This block stays the same
# async with aiohttp.ClientSession() as session:
#     tasks = [fetch_attractions(session, city) for city in cities]
#     results = await asyncio.gather(*tasks)

# # Print results
# for r in results:
#     print(f"{r['city']}: {len(r['attractions'])} attractions")


In [6]:
import pandas as pd
from IPython.display import display

# Flatten into a DataFrame (one row per city)
df = pd.json_normalize(final_list, sep="_")
display(df)

# Save JSON
out_path = "sample_combined_data_hotel_attraction_Texas_US.json"
with open(out_path, "w") as f:
    json.dump(final_list, f, indent=2)

print("✅ Saved combined results to", out_path)

Unnamed: 0,city,attractions,hotels,flights
0,Austin,"[{'name': 'Texas State Capitol', 'num_reviews'...",[],[]
1,Frisco,"[{'name': 'The Star', 'num_reviews': '211', 'r...",[],[]
2,San Antonio,"[{'name': 'San Antonio River Walk', 'num_revie...",[],[]
3,Dallas,[{'name': 'The Sixth Floor Museum at Dealey Pl...,[],[]
4,Houston,[{'name': 'The Houston Museum of Natural Scien...,[],[]


✅ Saved combined results to sample_combined_data_hotel_attraction_Texas_US.json
