<a href="https://colab.research.google.com/github/ashwin-yedte/visual-intelligence-travel-finance/blob/main/notebooks/VL_Encoding_Framework/GEO_LOCATION_%26_OFFERS_ENRICHMENT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**GEO-LOCATION & OFFERS ENRICHMENT**

Adds geocoding and fake offers to metadata.json

FEATURES:
1. Auto-geocode using Nominatim (OpenStreetMap)
2. Generate realistic fake offers (hotels, activities, flights, packages)
3. Update metadata.json in place


**IMPORT Libraries**

In [1]:
from google.colab import drive
import os
import json
import numpy as np
from PIL import Image
import torch
from transformers import CLIPModel, CLIPProcessor
from tqdm import tqdm
from datetime import datetime
import pickle
import warnings


# Mount Google Drive
drive.mount('/content/drive')


# Configuration
METADATA_PATH = '/content/drive/MyDrive/visual-intelligence-travel-finance/data/landmarks/metadata.json'
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
USER_AGENT = "indian-travel-destinations/1.0"

# Load metadata
print("\nLoading metadata...")
with open(METADATA_PATH, 'r') as f:
    metadata = json.load(f)

print(f"Loaded: {metadata['total_destinations']} destinations")
print("="*80)




Mounted at /content/drive

Loading metadata...
Loaded: 168 destinations


# GEOCODING FUNCTIONS

    Geocode a destination using Nominatim API.
    
    Args:
        destination_name: Name of destination (e.g., "Agonda Beach")
        state: State name (e.g., "goa")
        country: Country name (default: "India")
    
    Returns:
        Dict with latitude, longitude, city, region
    

In [2]:
import json
import time
import random
from datetime import datetime
import requests
from typing import Dict, List, Any

def geocode_destination(destination_name: str, state: str, country: str = "India") -> Dict[str, Any]:

    query = f"{destination_name}, {state}, {country}"

    params = {
        'q': query,
        'format': 'json',
        'limit': 1,
        'addressdetails': 1
    }

    headers = {
        'User-Agent': USER_AGENT
    }

    try:
        response = requests.get(NOMINATIM_URL, params=params, headers=headers)
        response.raise_for_status()

        results = response.json()

        if results and len(results) > 0:
            result = results[0]
            address = result.get('address', {})

            return {
                'latitude': float(result['lat']),
                'longitude': float(result['lon']),
                'city': address.get('city') or address.get('town') or address.get('village') or address.get('state_district'),
                'region': address.get('state') or state.title()
            }
        else:
            print(f"  No results for: {query}")
            return None

    except Exception as e:
        print(f"  Error geocoding {query}: {e}")
        return None

# FAKE OFFERS GENERATORS

In [3]:
def generate_hotel_offers(destination_name: str, theme: str) -> List[Dict]:
    """Generate 3-5 realistic fake hotel offers."""

    hotel_types = {
        'Beach': ['Beach Resort', 'Coastal Hotel', 'Beachfront Villa', 'Seaside Inn', 'Ocean View Resort'],
        'Temple': ['Heritage Hotel', 'Temple View Lodge', 'Pilgrim Rest House', 'Cultural Resort', 'Traditional Inn'],
        'Hill': ['Mountain Resort', 'Hill Station Hotel', 'Valley View Lodge', 'Highland Retreat', 'Peak Resort'],
        'Default': ['Deluxe Hotel', 'Boutique Resort', 'Comfort Inn', 'Premium Lodge', 'Luxury Resort']
    }

    types = hotel_types.get(theme, hotel_types['Default'])
    num_hotels = random.randint(3, 5)

    hotels = []
    for i in range(num_hotels):
        base_price = random.randint(1500, 8000)
        rating = round(random.uniform(3.5, 4.9), 1)

        hotel = {
            'name': f"{random.choice(types)} - {destination_name.split()[0]}",
            'price_per_night': base_price,
            'rating': rating,
            'reviews': random.randint(50, 500),
            'amenities': random.sample([
                'Free WiFi', 'Pool', 'Restaurant', 'Bar',
                'Gym', 'Spa', 'Beach Access', 'Parking',
                'Room Service', 'Air Conditioning', 'Sea View'
            ], k=random.randint(4, 7)),
            'distance_km': round(random.uniform(0.1, 5.0), 1),
            'availability': random.choice(['Available', 'Limited', 'Book Now'])
        }
        hotels.append(hotel)

    return sorted(hotels, key=lambda x: x['price_per_night'])


def generate_activity_offers(destination_name: str, theme: str) -> List[Dict]:
    """Generate 2-4 realistic activity offers."""

    activities_by_theme = {
        'Beach': [
            ('Scuba Diving', 2500, 3.0),
            ('Parasailing', 1500, 0.5),
            ('Jet Ski Ride', 1000, 0.25),
            ('Banana Boat Ride', 500, 0.5),
            ('Sunset Cruise', 2000, 2.0),
            ('Snorkeling Tour', 1200, 2.0),
            ('Beach Volleyball', 300, 1.0)
        ],
        'Temple': [
            ('Guided Temple Tour', 500, 2.0),
            ('Cultural Performance', 800, 1.5),
            ('Heritage Walk', 400, 3.0),
            ('Photography Tour', 600, 2.0),
            ('Spiritual Retreat', 1500, 4.0)
        ],
        'Hill': [
            ('Trekking Expedition', 1500, 4.0),
            ('Cable Car Ride', 800, 0.5),
            ('Nature Walk', 500, 2.0),
            ('Mountain Biking', 1200, 3.0),
            ('Camping Experience', 2000, 8.0)
        ],
        'Default': [
            ('City Tour', 800, 3.0),
            ('Food Tour', 1200, 2.0),
            ('Photography Walk', 600, 2.0),
            ('Cultural Experience', 1000, 3.0)
        ]
    }

    theme_activities = activities_by_theme.get(theme, activities_by_theme['Default'])
    num_activities = random.randint(2, 4)
    selected = random.sample(theme_activities, min(num_activities, len(theme_activities)))

    activities = []
    for name, base_price, duration in selected:
        activity = {
            'name': name,
            'price': base_price + random.randint(-200, 500),
            'duration_hours': duration,
            'rating': round(random.uniform(4.0, 5.0), 1),
            'reviews': random.randint(20, 200),
            'difficulty': random.choice(['Easy', 'Moderate', 'Challenging']),
            'group_size': f"{random.randint(2, 6)}-{random.randint(8, 15)} people"
        }
        activities.append(activity)

    return activities


def generate_flight_info(state: str, city: str) -> List[Dict]:
    """Generate nearest airport information."""

    airports = {
        'goa': {'name': 'Dabolim Airport', 'code': 'GOI', 'distance': 30},
        'kerala': {'name': 'Trivandrum International', 'code': 'TRV', 'distance': 45},
        'karnataka': {'name': 'Kempegowda International', 'code': 'BLR', 'distance': 50},
        'tamil nadu': {'name': 'Chennai International', 'code': 'MAA', 'distance': 40},
        'maharashtra': {'name': 'Chhatrapati Shivaji', 'code': 'BOM', 'distance': 35}
    }

    airport = airports.get(state.lower(), {'name': 'Regional Airport', 'code': 'XXX', 'distance': 50})

    return [{
        'nearest_airport': airport['name'],
        'airport_code': airport['code'],
        'distance_km': airport['distance'] + random.randint(-10, 20),
        'avg_flight_price': random.randint(3000, 8000),
        'airlines': random.sample(['IndiGo', 'Air India', 'SpiceJet', 'Vistara', 'GoAir'], k=3)
    }]


def generate_package_offers(destination_name: str, theme: str) -> List[Dict]:
    """Generate 2-3 package deals."""

    packages = []

    package_types = [
        {
            'name': f'Weekend Getaway - {destination_name.split()[0]}',
            'duration_days': 2,
            'base_price': 8000
        },
        {
            'name': f'Extended Holiday - {destination_name.split()[0]}',
            'duration_days': 4,
            'base_price': 15000
        },
        {
            'name': f'Luxury Escape - {destination_name.split()[0]}',
            'duration_days': 3,
            'base_price': 25000
        }
    ]

    num_packages = random.randint(2, 3)
    selected = random.sample(package_types, num_packages)

    for pkg in selected:
        package = {
            'name': pkg['name'],
            'duration_days': pkg['duration_days'],
            'price_per_person': pkg['base_price'] + random.randint(-2000, 5000),
            'includes': random.sample([
                'Accommodation', 'Meals', 'Transfers', 'Sightseeing',
                'Activities', 'Guide', 'Airport Pickup', 'Travel Insurance'
            ], k=random.randint(4, 6)),
            'rating': round(random.uniform(4.2, 4.9), 1),
            'reviews': random.randint(30, 150),
            'availability': random.choice(['Available', 'Limited Slots', 'Booking Fast'])
        }
        packages.append(package)

    return sorted(packages, key=lambda x: x['price_per_person'])

In [4]:
print("\n" + "="*80)
print("PROCESSING DESTINATIONS")
print("="*80)

total_destinations = 0
geocoded_count = 0
failed_geocoding = []

for theme in metadata['themes']:
    theme_name = theme['theme_name']
    print(f"\nProcessing theme: {theme_name}")

    for state in theme['states']:
        state_name = state['state_name']
        print(f"  State: {state_name}")

        for destination in state['destinations']:
            total_destinations += 1
            dest_name = destination['destination_name']
            dest_id = destination['destination_id']

            print(f"    {dest_name}...", end=" ")

            # GEOCODING
            geo_data = geocode_destination(dest_name, state_name)

            if geo_data:
                destination['geo_location'] = geo_data
                destination['geo_tagged'] = True
                geocoded_count += 1
                print(f" Geo", end=" ")
            else:
                failed_geocoding.append(f"{dest_name}, {state_name}")
                print(f" Geo", end=" ")

            # GENERATE OFFERS
            destination['offers'] = {
                'hotels': generate_hotel_offers(dest_name, theme_name),
                'activities': generate_activity_offers(dest_name, theme_name),
                'flights': generate_flight_info(state_name, geo_data['city'] if geo_data else None),
                'packages': generate_package_offers(dest_name, theme_name)
            }

            print(f" Offers")

            # Rate limiting for Nominatim (1 request per second)
            time.sleep(1.1)

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"Total destinations: {total_destinations}")
print(f"Successfully geocoded: {geocoded_count}")
print(f"Failed geocoding: {len(failed_geocoding)}")

if failed_geocoding:
    print("\nFailed locations:")
    for loc in failed_geocoding:
        print(f"  - {loc}")

# UPDATE METADATA
metadata['last_updated'] = datetime.now().isoformat()
metadata['enrichment_status'] = {
    'geo_tagged': geocoded_count,
    'offers_generated': total_destinations,
    'enrichment_date': datetime.now().isoformat()
}

print("\n" + "="*80)
print("SAVING UPDATED METADATA")
print("="*80)

with open(METADATA_PATH, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f" Saved to: {METADATA_PATH}")

print("\n" + "="*80)
print("ENRICHMENT COMPLETE!")
print("="*80)
print("\nSample destination check:")
first_dest = metadata['themes'][0]['states'][0]['destinations'][0]
print(f"Destination: {first_dest['destination_name']}")
print(f"Geo-location: {first_dest['geo_location']}")
print(f"Hotels: {len(first_dest['offers']['hotels'])} offers")
print(f"Activities: {len(first_dest['offers']['activities'])} offers")
print(f"Packages: {len(first_dest['offers']['packages'])} offers")
print("="*80)


PROCESSING DESTINATIONS

Processing theme: Beach
  State: goa
    Agonda Beach...  Geo  Offers
    Anjuna Beach...  Geo  Offers
    Arambol Beach...  Geo  Offers
    Ashwem Beach...  Geo  Offers
    Baga Beach...  Geo  Offers
    Betalbatim Beach...  Geo  Offers
    Butterfly Beach...  Geo  Offers
    Calangute Beach...  Geo  Offers
    Candolim Beach...  Geo  Offers
    Colva Beach...  Geo  Offers
    Kakolem Beach...  Geo  Offers
    Mandrem Beach...  Geo  Offers
    Miramar Beach...  Geo  Offers
    Morjim Beach...  Geo  Offers
    Palolem Beach...  Geo  Offers
    Sinquerim Beach...  Geo  Offers
    Vagator Beach...  Geo  Offers
  State: kerala
    Alappuzha Beach...  Geo  Offers
    Bekal Beach...  Geo  Offers
    Cherai Beach...  Geo  Offers
    Kappad Beach...  Geo  Offers
    Kappil Beach...  Geo  Offers
    Kollam Beach...  Geo  Offers
    Kovalam Beach...  Geo  Offers
    Kozhikode Beach...  Geo  Offers
    Muzhappilangad Beach...  Geo  Offers
    Shanghumukham Beach...   No