In [12]:
# Standard library imports
import os
import json
import requests
import getpass
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple

# Data processing
import pandas as pd

# Data validation
from pydantic import BaseModel, Field

# LangChain imports
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# Amadeus API client
from amadeus import Client, ResponseError

In [27]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
os.environ["AMADEUS_CLIENT_ID"] = "1nsyVeNCT6PQtpqb1iG5MsIPWTkvN8bU"
os.environ["AMADEUS_CLIENT_SECRET"] = "GizAt9lxJi00tWG7"

In [28]:
def get_amadeus_access_token():
    """Get Amadeus API access token."""
    auth_url = "https://test.api.amadeus.com/v1/security/oauth2/token"
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    data = {
        "grant_type": "client_credentials",
        "client_id": os.environ["AMADEUS_CLIENT_ID"],
        "client_secret": os.environ["AMADEUS_CLIENT_SECRET"]
    }
    
    response = requests.post(auth_url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json().get("access_token")
    else:
        print(f"Failed to get access token: {response.json()}")
        return None

In [29]:
class DestinationEnricher:
    """Enriches destination data with additional context."""
    
    @staticmethod
    def add_cultural_context(city: str, country: str) -> str:
        """Add cultural and historical context for a destination."""
        cultural_context = {
            "Paris": """
            Paris's cultural heritage spans over two millennia. Founded in the 3rd century BC 
            by the Parisii, a Celtic tribe, the city evolved from a Roman settlement to the 
            cultural capital of Europe. The city's golden age during the Belle Époque 
            (1871-1914) saw the construction of iconic landmarks and the emergence of 
            artistic movements that would influence global culture.
            
            Contemporary Paris maintains its position as a global trendsetter in fashion, 
            cuisine, and the arts, while preserving its historical character through 
            protected architectural zones and cultural preservation initiatives.
            """,
            "Tokyo": """
            Tokyo's cultural narrative is a fascinating blend of ancient Edo period traditions 
            and cutting-edge modernity. Originally a small fishing village named Edo, the city 
            became the seat of the Tokugawa shogunate in 1603, marking the beginning of its 
            transformation into a major political and cultural center.
            
            Modern Tokyo represents the perfect synthesis of tradition and innovation, where 
            centuries-old temples stand alongside futuristic skyscrapers, and traditional tea 
            ceremonies coexist with robot restaurants and digital art exhibitions.
            """
            # Add more cities as needed
        }
        return cultural_context.get(city, f"Detailed cultural and historical information about {city}, {country}.")

    @staticmethod
    def add_seasonal_events(city: str) -> str:
        """Add information about seasonal events and festivals."""
        seasonal_events = {
            "Paris": """
            Spring (March-May):
            - Paris Art Fair (April)
            - French Open Tennis Tournament (May-June)
            - Saint-Germain-des-Prés Jazz Festival (May)
            
            Summer (June-August):
            - Fête de la Musique (June 21)
            - Bastille Day Celebrations (July 14)
            - Paris Plages (July-August)
            
            Autumn (September-November):
            - European Heritage Days (September)
            - Nuit Blanche Art Festival (October)
            - Paris Photo Fair (November)
            
            Winter (December-February):
            - Christmas Markets (December)
            - Paris Fashion Week (January/February)
            - Chinese New Year Parade (January/February)
            """,
            # Add more cities
        }
        return seasonal_events.get(city, "Annual events and festivals information.")

In [30]:
def fetch_popular_destinations(city_code: str) -> List[Document]:
    """Enhanced version of fetch_popular_destinations with richer context."""
    try:
        amadeus = Client(
            client_id=os.environ["AMADEUS_CLIENT_ID"],
            client_secret=os.environ["AMADEUS_CLIENT_SECRET"]
        )
        response = amadeus.shopping.flight_destinations.get(origin=city_code)
        destinations = response.data
        
        documents = []
        for dest in destinations:
            # Get base destination info
            dest_info = f"""
            Destination Analysis: {dest['destination']}
            
            Flight Information:
            Departure Date: {dest.get('departureDate', 'N/A')}
            Return Date: {dest.get('returnDate', 'N/A')}
            Price: {dest.get('price', {}).get('total', 'N/A')} {dest.get('price', {}).get('currency', 'USD')}
            
            Travel Insights:
            - Peak vs. Off-peak season analysis
            - Price trends and historical data
            - Popular connecting routes
            - Typical flight durations and time zones
            
            Destination Background:
            {DestinationEnricher.add_cultural_context(dest['destination'], '')}
            
            Seasonal Events and Festivals:
            {DestinationEnricher.add_seasonal_events(dest['destination'])}
            
            Travel Planning Tips:
            - Recommended booking windows
            - Alternative airports and routes
            - Airline alliance benefits
            - Visa and entry requirements
            """
            
            doc = Document(
                page_content=dest_info,
                metadata={
                    "type": "popular_destination",
                    "destination_code": dest['destination'],
                    "price": dest.get('price', {}).get('total', 'N/A'),
                    "departure_date": dest.get('departureDate', 'N/A'),
                    "return_date": dest.get('returnDate', 'N/A')
                }
            )
            documents.append(doc)
        return documents
    except ResponseError as error:
        print(f"Error fetching popular destinations: {error}")
        return []

In [31]:
def fetch_hotel_information(city_code: str) -> List[Document]:
    """Enhanced version of fetch_hotel_information with comprehensive context."""
    try:
        amadeus = Client(
            client_id=os.environ["AMADEUS_CLIENT_ID"],
            client_secret=os.environ["AMADEUS_CLIENT_SECRET"]
        )
        response = amadeus.reference_data.locations.hotels.by_city.get(cityCode=city_code)
        hotels = response.data
        
        documents = []
        for hotel in hotels:
            # Generate neighborhood context
            neighborhood_info = generate_neighborhood_context(
                hotel.get('geoCode', {}).get('latitude'),
                hotel.get('geoCode', {}).get('longitude')
            )
            
            # Generate amenities description
            amenities_info = generate_detailed_amenities(hotel.get('amenities', []))
            
            content = f"""
            Hotel Comprehensive Profile: {hotel.get('name', 'N/A')}
            
            Location Analysis:
            City: {city_code}
            Precise Location: {hotel.get('geoCode', {}).get('latitude', 'N/A')}, 
                            {hotel.get('geoCode', {}).get('longitude', 'N/A')}
            Country: {hotel.get('address', {}).get('countryCode', 'N/A')}
            
            Property Details:
            Chain: {hotel.get('chainCode', 'Independent Property')}
            Category: {determine_hotel_category(hotel)}
            Last Updated: {hotel.get('lastUpdate', 'N/A')}
            
            Neighborhood Overview:
            {neighborhood_info}
            
            Detailed Amenities:
            {amenities_info}
            
            Transportation Access:
            - Distance from major airports
            - Public transit options
            - Parking facilities
            - Local transportation services
            
            Guest Services:
            - Check-in/out policies
            - Room service availability
            - Business facilities
            - Wellness options
            
            Area Attractions:
            {generate_nearby_attractions(city_code)}
            
            Additional Information:
            - Seasonal considerations
            - Business travel amenities
            - Family-friendly features
            - Accessibility information
            """
            
            doc = Document(
                page_content=content,
                metadata={
                    "type": "hotel_information",
                    "hotel_id": hotel.get('hotelId', 'unknown'),
                    "city_code": city_code,
                    "chain_code": hotel.get('chainCode', 'N/A'),
                    "location": {
                        "latitude": hotel.get('geoCode', {}).get('latitude'),
                        "longitude": hotel.get('geoCode', {}).get('longitude')
                    }
                }
            )
            documents.append(doc)
        return documents
    except ResponseError as error:
        print(f"Error fetching hotel information: {error}")
        return []

In [32]:
def fetch_flight_offers(origin: str, destination: str, departure_date: str) -> List[Document]:
    """Enhanced version of fetch_flight_offers with comprehensive flight and route analysis."""
    try:
        amadeus = Client(
            client_id=os.environ["AMADEUS_CLIENT_ID"],
            client_secret=os.environ["AMADEUS_CLIENT_SECRET"]
        )
        response = amadeus.shopping.flight_offers_search.get(
            originLocationCode=origin,
            destinationLocationCode=destination,
            departureDate=departure_date,
            adults=1,
            max=10
        )
        flights = response.data
        
        documents = []
        for flight in flights:
            itineraries = flight.get('itineraries', [])
            price_info = flight.get('price', {})
            
            # Generate detailed route analysis
            route_analysis = generate_route_analysis(origin, destination)
            
            # Generate detailed flight information
            flight_details = []
            for itinerary in itineraries:
                for segment in itinerary.get('segments', []):
                    departure = segment.get('departure', {})
                    arrival = segment.get('arrival', {})
                    carrier = segment.get('carrierCode', 'Unknown')
                    
                    segment_info = f"""
                    Flight Segment Analysis:
                    Carrier: {carrier} {segment.get('number', 'N/A')}
                    Equipment: {segment.get('aircraft', {}).get('code', 'N/A')}
                    
                    Departure Details:
                    Airport: {departure.get('iataCode', 'N/A')}
                    Terminal: {departure.get('terminal', 'N/A')}
                    Time: {departure.get('at', 'N/A')}
                    
                    Arrival Details:
                    Airport: {arrival.get('iataCode', 'N/A')}
                    Terminal: {arrival.get('terminal', 'N/A')}
                    Time: {arrival.get('at', 'N/A')}
                    
                    Operational Information:
                    - Aircraft specifications
                    - Typical on-time performance
                    - Seasonal reliability metrics
                    """
                    flight_details.append(segment_info)
            
            content = f"""
            Comprehensive Flight Analysis: {origin} to {destination}
            
            Route Overview:
            {route_analysis}
            
            Pricing Information:
            Base Fare: {price_info.get('base', 'N/A')}
            Total Price: {price_info.get('total', 'N/A')} {price_info.get('currency', 'EUR')}
            
            Detailed Flight Information:
            {"".join(flight_details)}
            
            Route Market Analysis:
            - Historical price trends
            - Peak travel periods
            - Alternative routing options
            - Alliance and codeshare details
            
            Airport Information:
            {generate_airport_information(origin)}
            {generate_airport_information(destination)}
            
            Travel Planning Guidelines:
            - Optimal booking windows
            - Fare class benefits
            - Baggage policies
            - Transit visa requirements
            - Connection considerations
            
            Additional Services:
            - Available ancillary services
            - Lounge access details
            - Special assistance services
            - Meal and seat selection options
            """
            
            doc = Document(
                page_content=content,
                metadata={
                    "type": "flight_offer",
                    "origin": origin,
                    "destination": destination,
                    "departure_date": departure_date,
                    "price": price_info.get('total', 'N/A'),
                    "currency": price_info.get('currency', 'EUR'),
                    "booking_class": flight.get('bookingClass', 'N/A')
                }
            )
            documents.append(doc)
        return documents
    except ResponseError as error:
        print(f"Error fetching flight offers: {error}")
        return []

In [33]:
# Helper functions for generating rich context
def generate_neighborhood_context(lat: float, lon: float) -> str:
    """Generate detailed neighborhood context based on coordinates."""
    return """
    Neighborhood Characteristics:
    - Local atmosphere and vibe
    - Safety and security assessment
    - Proximity to business districts
    - Entertainment and dining options
    - Cultural attractions nearby
    - Shopping facilities
    - Green spaces and recreation
    
    Transportation Hub Analysis:
    - Major transit stations
    - Bus and tram routes
    - Taxi availability
    - Bike-sharing stations
    
    Local Life:
    - Popular local venues
    - Markets and shopping areas
    - Cultural institutions
    - Sports facilities
    """

In [43]:
def determine_hotel_category(hotel: Dict) -> str:
    """Determine hotel category based on available data."""
    # Use chain code to determine category if available
    chain_categories = {
        'HL': 'Luxury Hotel',
        'HB': 'Business Hotel',
        'HR': 'Resort Hotel',
        'HH': 'Historic Hotel',
        'BA': 'Boutique Accommodation',
        'AP': 'Apartment Hotel'
    }
    
    chain_code = hotel.get('chainCode', '')
    if chain_code in chain_categories:
        return chain_categories[chain_code]
        
    # If no chain code or unknown, try to determine from other attributes
    name = hotel.get('name', '').lower()
    if 'resort' in name:
        return 'Resort Hotel'
    elif 'boutique' in name:
        return 'Boutique Hotel'
    elif 'apartment' in name or 'residence' in name:
        return 'Apartment Hotel'
    elif 'palace' in name or 'luxury' in name:
        return 'Luxury Hotel'
    elif 'business' in name:
        return 'Business Hotel'
    
    # Default category
    return 'Standard Hotel'

In [34]:
def generate_detailed_amenities(amenities: List[str]) -> str:
    """Generate comprehensive amenities description."""
    return """
    Room Features:
    - Climate control systems
    - Entertainment options
    - Work space configuration
    - Connectivity solutions
    
    Property Facilities:
    - Dining venues
    - Meeting spaces
    - Wellness facilities
    - Recreation options
    
    Business Services:
    - Conference facilities
    - Technical support
    - Business center
    - Translation services
    
    Guest Services:
    - Concierge assistance
    - Room service hours
    - Laundry facilities
    - Airport transfers
    """

In [35]:
def generate_route_analysis(origin: str, destination: str) -> str:
    """Generate detailed route analysis."""
    return f"""
    Route Characteristics:
    - Distance and typical duration
    - Common connection points
    - Seasonal weather impact
    - Time zone considerations
    
    Market Analysis:
    - Popular travel periods
    - Price fluctuation patterns
    - Competing airlines
    - Alternative routes
    
    Operational Considerations:
    - Aircraft types commonly used
    - Typical delays and causes
    - Seasonal performance metrics
    - Airport congestion analysis
    """

In [36]:
def generate_airport_information(airport_code: str) -> str:
    """Generate detailed airport information."""
    return f"""
    Airport: {airport_code}
    
    Terminal Information:
    - Layout and facilities
    - Transfer processes
    - Security procedures
    - Lounges and services
    
    Ground Transportation:
    - Public transit options
    - Taxi and ride-share
    - Car rental facilities
    - Parking services
    
    Amenities:
    - Dining options
    - Shopping facilities
    - Business services
    - Medical facilities
    """

In [37]:
def generate_nearby_attractions(city_code: str) -> str:
    """Generate information about nearby attractions."""
    city_attractions = {
        'PAR': """
        Major Landmarks:
        - Eiffel Tower (Historic iron lattice tower, symbol of Paris)
        - Louvre Museum (World's largest art museum, home to Mona Lisa)
        - Notre-Dame Cathedral (Medieval Catholic cathedral, Gothic architecture)
        - Arc de Triomphe (Historic monument, honors those who fought for France)
        
        Cultural Districts:
        - Le Marais (Historic district, medieval architecture, trendy shops)
        - Montmartre (Artistic neighborhood, Sacré-Cœur Basilica)
        - Latin Quarter (Academic district, historic universities)
        
        Entertainment Areas:
        - Champs-Élysées (Luxury shopping, dining, entertainment)
        - Canal Saint-Martin (Trendy area, cafes, boutiques)
        """,
        'LON': """
        Historic Sites:
        - Tower of London (Historic castle and fortress)
        - Westminster Abbey (Gothic church, royal coronations)
        - Buckingham Palace (Official residence of British monarch)
        - St. Paul's Cathedral (Anglican cathedral, iconic dome)
        
        Cultural Venues:
        - British Museum (World artifacts and art)
        - Tate Modern (Modern and contemporary art)
        - National Gallery (European paintings)
        
        Entertainment Districts:
        - Covent Garden (Shopping, street performers, dining)
        - Soho (Entertainment, theaters, restaurants)
        """,
        'NYC': """
        Iconic Landmarks:
        - Statue of Liberty (Symbol of freedom and democracy)
        - Empire State Building (Art Deco skyscraper)
        - Times Square (Entertainment and commercial intersection)
        - Central Park (Urban park, recreational activities)
        
        Cultural Centers:
        - Metropolitan Museum of Art (Extensive art collection)
        - Broadway Theater District (Live theater performances)
        - Lincoln Center (Performing arts complex)
        
        Neighborhoods:
        - Greenwich Village (Artistic community, music venues)
        - SoHo (Shopping, galleries, architecture)
        """
    }
    return city_attractions.get(city_code, "Information about local attractions and points of interest.")

In [38]:
def prepare_destination_info() -> List[Document]:
    """Enhanced version of prepare_destination_info with richer context."""
    destinations = [
        {
            "city": "Paris",
            "country": "France",
            "description": """
            Paris, the City of Light, stands as a global epicenter of art, fashion, gastronomy, and culture. 
            Founded in the 3rd century BC, the city has evolved through centuries of rich history to become 
            one of the world's most influential and visited capitals. Its urban landscape marries historic 
            architecture with contemporary innovation, while its cultural scene spans from classical arts 
            to cutting-edge trends.
            """,
            "attractions": [
                "Eiffel Tower",
                "Louvre Museum",
                "Notre-Dame Cathedral",
                "Arc de Triomphe",
                "Champs-Élysées",
                "Montmartre",
                "Palace of Versailles",
                "Seine River",
                "Musée d'Orsay",
                "Centre Pompidou"
            ],
            "cuisine": [
                "Croissants and French pastries",
                "Coq au Vin",
                "Beef Bourguignon",
                "French cheese varieties",
                "Escargots",
                "Macarons",
                "French wines",
                "Haute cuisine restaurants",
                "Traditional bistros",
                "Michelin-starred establishments"
            ],
            "transportation": """
            Paris boasts one of Europe's most comprehensive public transportation networks. The Metro system 
            features 16 lines covering most of the city, complemented by the RER regional trains. The bus 
            network provides extensive surface coverage, while trams serve the city's periphery. The Paris 
            Visite pass offers unlimited travel across all transport modes, making it ideal for tourists. 
            Bike-sharing systems like Vélib' provide eco-friendly alternatives, and dedicated bus lanes 
            ensure efficient public transit flow during peak hours.
            """,
            "weather": """
            Paris experiences a temperate climate with mild conditions year-round. Summers (June-August) 
            are warm and lively, with temperatures averaging 20-25°C (68-77°F). Winters (December-February) 
            are cool and gray, with temperatures around 3-8°C (37-46°F). Spring (March-May) brings mild 
            temperatures and beautiful blooms, while autumn (September-November) offers comfortable 
            conditions and stunning foliage. Rainfall is moderate and distributed throughout the year, 
            with occasional summer showers and winter drizzles.
            """
        },
        {
            "city": "Tokyo",
            "country": "Japan",
            "description": """
            Tokyo, Japan's bustling capital, is a city where ultramodern lives harmoniously with tradition. 
            From ancient temples to neon-lit skyscrapers, the city offers a fascinating blend of old and new. 
            As a global financial center and technology hub, Tokyo drives innovation while preserving its rich 
            cultural heritage. The city's 23 special wards each offer unique experiences, from the pop culture 
            of Harajuku to the traditional atmosphere of Asakusa.
            """,
            "attractions": [
                "Senso-ji Temple",
                "Tokyo Skytree",
                "Meiji Shrine",
                "Shibuya Crossing",
                "Imperial Palace",
                "Tsukiji Outer Market",
                "Akihabara Electric Town",
                "Ueno Park",
                "Tokyo Tower",
                "Ginza District"
            ],
            "cuisine": [
                "Sushi and sashimi",
                "Ramen varieties",
                "Tempura",
                "Yakitori",
                "Tonkatsu",
                "Monjayaki",
                "Japanese wagyu beef",
                "Izakaya dining",
                "Traditional kaiseki",
                "Street food specialties"
            ],
            "transportation": """
            Tokyo's transportation system is renowned for its efficiency and punctuality. The extensive 
            rail network includes JR lines, subway systems, and private railways, connecting all parts 
            of the metropolitan area. The Yamanote Line loops around central Tokyo, providing access to 
            major districts. IC cards like Pasmo and Suica offer convenient payment for all public 
            transportation. Buses complement the rail network, while taxis provide reliable door-to-door 
            service. The system handles over 40 million passengers daily with remarkable precision.
            """,
            "weather": """
            Tokyo experiences four distinct seasons. Spring (March-May) brings mild temperatures and 
            famous cherry blossoms, with temperatures ranging from 10-20°C (50-68°F). Summers 
            (June-August) are hot and humid, often exceeding 30°C (86°F), with a rainy season in 
            June. Autumn (September-November) offers comfortable temperatures and colorful foliage. 
            Winters (December-February) are cool and dry, with temperatures rarely falling below 
            0°C (32°F). The city averages 1,528mm of annual rainfall.
            """
        }
    ]
    
    documents = []
    for dest in destinations:
        content = f"""
        {dest['city']}, {dest['country']} - Comprehensive Travel Guide
        
        Overview and History:
        {dest['description']}
        
        Cultural Significance:
        {DestinationEnricher.add_cultural_context(dest['city'], dest['country'])}
        
        Detailed Attractions Guide:
        {format_attractions(dest['attractions'])}
        
        Culinary Landscape:
        Traditional Cuisine: {format_cuisine(dest['cuisine'])}
        
        Dining Scene:
        - Michelin-starred restaurants
        - Local food markets
        - Street food culture
        - Traditional dining etiquette
        - Popular food districts
        - Contemporary fusion cuisine
        
        Transportation Network:
        {dest['transportation']}
        
        Climate and Seasonal Considerations:
        {dest['weather']}
        
        Seasonal Events and Festivals:
        {DestinationEnricher.add_seasonal_events(dest['city'])}
        
        Practical Travel Information:
        - Visa requirements and entry procedures
        - Currency and banking facilities
        - Emergency services and healthcare
        - Communication and internet access
        - Shopping districts and local markets
        - Entertainment venues and nightlife
        - Cultural etiquette and customs
        - Language essentials for travelers
        
        Neighborhood Guide:
        - Popular residential areas
        - Business districts
        - Tourist hotspots
        - Historical quarters
        - Modern developments
        - Entertainment zones
        
        Accommodation Options:
        - Luxury hotels
        - Boutique properties
        - Business hotels
        - Budget accommodations
        - Traditional lodging
        - Serviced apartments
        
        Safety and Security:
        - General safety tips
        - Emergency contacts
        - Tourist police information
        - Health and medical facilities
        - Natural disaster preparedness
        """
        
        doc = Document(
            page_content=content,
            metadata={
                "type": "destination_info",
                "city": dest['city'],
                "country": dest['country'],
                "attractions": dest['attractions'],
                "cuisine": dest['cuisine']
            }
        )
        documents.append(doc)
    
    return documents

In [39]:
def format_attractions(attractions: List[str]) -> str:
    """Format attractions with detailed descriptions."""
    formatted = []
    for attraction in attractions:
        formatted.append(f"""
        {attraction}:
        - Historical significance and background
        - Visitor information and access
        - Best times to visit
        - Cultural importance
        - Photography opportunities
        - Guided tour options
        - Nearby facilities
        """)
    return "\n".join(formatted)

In [40]:
def format_cuisine(cuisine: List[str]) -> str:
    """Format cuisine information with detailed descriptions."""
    formatted = []
    for dish in cuisine:
        formatted.append(f"""
        {dish}:
        - Traditional preparation methods
        - Cultural significance
        - Best places to try
        - Seasonal variations
        - Typical ingredients
        - Pairing recommendations
        """)
    return "\n".join(formatted)

In [41]:
def build_travel_knowledge_base() -> FAISS:
    """Enhanced version of build_travel_knowledge_base with comprehensive context."""
    all_documents = []
    
    # Start with static destination information
    print("Collecting static destination information...")
    all_documents.extend(prepare_destination_info())
    
    # Fetch dynamic information from Amadeus API
    print("Fetching popular destinations from NYC...")
    all_documents.extend(fetch_popular_destinations('NYC'))
    
    # Fetch hotel information for major cities
    major_cities = ['PAR', 'LON', 'NYC', 'TYO', 'ROM']
    print("Fetching hotel information for major cities...")
    for city_code in major_cities:
        print(f"Processing {city_code}...")
        all_documents.extend(fetch_hotel_information(city_code))
    
    # Fetch flight offers for popular routes
    print("Fetching flight offers for popular routes...")
    next_month = (datetime.now() + timedelta(days=30)).strftime('%Y-%m-%d')
    popular_routes = [
        ('NYC', 'LON'), ('NYC', 'PAR'), ('NYC', 'ROM'),
        ('LON', 'PAR'), ('LON', 'ROM'), ('PAR', 'ROM')
    ]
    
    for origin, destination in popular_routes:
        print(f"Processing route {origin} to {destination}...")
        all_documents.extend(fetch_flight_offers(origin, destination, next_month))
    
    # Create text splitter with larger chunk size for RAGAS
    print("Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=3000,  # Increased for RAGAS compatibility
        chunk_overlap=300,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_documents = text_splitter.split_documents(all_documents)
    
    # Create and return vector store
    print(f"Building vector store with {len(split_documents)} documents...")
    vector_store = FAISS.from_documents(
        documents=split_documents,
        embedding=OpenAIEmbeddings()
    )
    
    print("Knowledge base built successfully!")
    return vector_store

In [44]:
# Build the knowledge base
travel_db = build_travel_knowledge_base()
print(f"Knowledge base built with {len(travel_db.index_to_docstore_id)} documents")

Collecting static destination information...
Fetching popular destinations from NYC...
Fetching hotel information for major cities...
Processing PAR...
Processing LON...
Processing NYC...
Processing TYO...
Processing ROM...
Fetching flight offers for popular routes...
Processing route NYC to LON...
Processing route NYC to PAR...
Processing route NYC to ROM...
Processing route LON to PAR...
Processing route LON to ROM...
Processing route PAR to ROM...
Error fetching flight offers: [---]
Splitting documents into chunks...
Building vector store with 2663 documents...
Knowledge base built successfully!
Knowledge base built with 2663 documents


In [48]:
# Print each document in the vector store
for index, doc_id in travel_db.index_to_docstore_id.items():
    doc = travel_db.docstore.search(doc_id)
    char_length = len(doc.page_content)
    word_length = len(doc.page_content.split())
    print(f"Index: {index}")
    print(f"Document ID: {doc_id}")
    print(f"Length: {char_length} characters, {word_length} words")
    print(f"Content: {doc.page_content[:500]}...")  # Print first 500 chars
    print(f"Metadata: {doc.metadata}\n")

Index: 0
Document ID: da7c5a0f-2a45-4a96-bc2c-6d83d93761d9
Length: 2774 characters, 312 words
Content: Paris, France - Comprehensive Travel Guide
        
        Overview and History:
        
            Paris, the City of Light, stands as a global epicenter of art, fashion, gastronomy, and culture. 
            Founded in the 3rd century BC, the city has evolved through centuries of rich history to become 
            one of the world's most influential and visited capitals. Its urban landscape marries historic 
            architecture with contemporary innovation, while its cultural scene sp...
Metadata: {'type': 'destination_info', 'city': 'Paris', 'country': 'France', 'attractions': ['Eiffel Tower', 'Louvre Museum', 'Notre-Dame Cathedral', 'Arc de Triomphe', 'Champs-Élysées', 'Montmartre', 'Palace of Versailles', 'Seine River', "Musée d'Orsay", 'Centre Pompidou'], 'cuisine': ['Croissants and French pastries', 'Coq au Vin', 'Beef Bourguignon', 'French cheese varieties', 'Escargot

In [49]:
import numpy as np
from statistics import median

# Get all document lengths
doc_lengths = [len(travel_db.docstore.search(doc_id).page_content) 
               for doc_id in travel_db.index_to_docstore_id.values()]

# Calculate statistics
max_len = max(doc_lengths)
min_len = min(doc_lengths)
mean_len = np.mean(doc_lengths)
median_len = median(doc_lengths)

print(f"Document Length Statistics:")
print(f"Maximum length: {max_len:,} characters")
print(f"Minimum length: {min_len:,} characters")
print(f"Mean length: {mean_len:,.2f} characters")
print(f"Median length: {median_len:,} characters")

# If you want word counts too
word_lengths = [len(travel_db.docstore.search(doc_id).page_content.split()) 
                for doc_id in travel_db.index_to_docstore_id.values()]

print(f"\nWord Count Statistics:")
print(f"Maximum words: {max(word_lengths):,}")
print(f"Minimum words: {min(word_lengths):,}")
print(f"Mean words: {np.mean(word_lengths):,.2f}")
print(f"Median words: {median(word_lengths):,}")

Document Length Statistics:
Maximum length: 2,987 characters
Minimum length: 267 characters
Mean length: 2,670.62 characters
Median length: 2,857 characters

Word Count Statistics:
Maximum words: 313
Minimum words: 22
Mean words: 259.28
Median words: 278


In [50]:
import pickle

# Save the vector store
travel_db.save_local("travel_db_faiss")

# Save the docstore separately (contains the actual documents)
with open('travel_db_docstore.pkl', 'wb') as f:
    pickle.dump(travel_db.docstore, f)