# Travel Guider - Complete ML System
## Comprehensive Travel Itinerary Generation and Optimization

This notebook contains all ML/AI functionality for the Travel Guider project including:
- Data preprocessing and analysis
- Travel itinerary generation with regional clustering
- Flask API service for backend integration
- Checklist generation for travel recommendations
- Distance calculations and route optimization

## 1. Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import json
import math
import random
from datetime import datetime, timedelta
from flask import Flask, request, jsonify
from flask_cors import CORS
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ All required libraries imported successfully")

‚úÖ All required libraries imported successfully


## 2. Data Loading and Preprocessing

In [12]:
def load_places_data(csv_path='../backend/uploads/places.csv'):
    """Load and preprocess places data with encoding handling"""
    try:
        # Try multiple encodings
        for encoding in ['utf-8', 'latin-1', 'cp1252']:
            try:
                df = pd.read_csv(csv_path, encoding=encoding)
                print(f"‚úÖ Data loaded successfully with {encoding} encoding")
                print(f"üìä Dataset contains {len(df)} places")
                print(f"üìã Columns: {list(df.columns)}")
                return df
            except UnicodeDecodeError:
                continue
        
        print("‚ùå Failed to load data with any encoding")
        return None
        
    except Exception as e:
        print(f"‚ùå Error loading data: {e}")
        return None

# Load the data
places_df = load_places_data()
if places_df is not None:
    print("\nüìä Data Preview:")
    print(places_df.head())

‚úÖ Data loaded successfully with latin-1 encoding
üìä Dataset contains 824 places
üìã Columns: ['Name', 'District', 'Description', 'Region', 'Category', 'Eestimated_time_to_visit', 'Foreign_Adult', 'Foreign_Child', 'Local_Adult', 'Local_Child', 'Student', 'Free_Entry', 'Latitude', 'Longitude', 'Main Image', 'Thumbnail_Image', 'Gallery_Image 1', 'Gallery_Image 2', 'Gallery_Image 3']

üìä Data Preview:
                  Name District  \
0  Viharamahadevi Park  Colombo   
1          Lotus Tower  Colombo   
2            Port City  Colombo   
3           Galle Face  Colombo   
4   Gangaramaya Temple  Colombo   

                                         Description Region   Category  \
0  Relaxed park with jogging trails, kids' play a...   West     Nature   
1  Lotus Tower, Colombo, Sri Lanka is a sleek, lo...   West     Scenic   
2  Port City is a new modern city being built on ...   West      Urban   
3  Galle Face is a big open area by the sea in Co...   West      Beach   
4  Historic

## 3. Geographic Distance Calculation

In [4]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """Calculate the great circle distance between two points on Earth"""
    if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
        return 0
    
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a))
    r = 6371  # Radius of earth in kilometers
    return c * r

def calculate_travel_time(distance_km, transport_mode='public'):
    """Calculate travel time based on distance and transport mode"""
    if distance_km <= 0:
        return 0
    
    # Average speeds (km/h)
    speeds = {
        'public': 35,   # Bus/train average
        'private': 50,  # Private car
        'walking': 5    # Walking
    }
    
    speed = speeds.get(transport_mode, 35)
    travel_time = distance_km / speed
    
    # Add buffer time for breaks, traffic, etc.
    if distance_km > 50:
        travel_time += 0.5  # 30 minutes break for long distances
    
    return round(travel_time, 2)

print("‚úÖ Geographic calculation functions defined")

‚úÖ Geographic calculation functions defined


## 4. Regional Clustering and Place Categorization

In [5]:
class RegionalClusteringSystem:
    """Advanced regional clustering for travel itinerary generation"""
    
    def __init__(self, places_df):
        self.places_df = places_df
        self.colombo_lat = 6.9271
        self.colombo_lon = 79.8612
        
    def categorize_by_region(self):
        """Categorize places by geographical regions"""
        regional_groups = {
            'Western': [],
            'Central': [],
            'Southern': [],
            'Northern': [],
            'Eastern': [],
            'North Western': [],
            'North Central': [],
            'Uva': [],
            'Sabaragamuwa': []
        }
        
        for _, place in self.places_df.iterrows():
            region = self._determine_region(place)
            regional_groups[region].append(place.to_dict())
        
        # Log regional distribution
        for region, places in regional_groups.items():
            if places:
                print(f"üìç {region}: {len(places)} places")
        
        return regional_groups
    
    def _determine_region(self, place):
        """Determine region based on district and coordinates"""
        district = str(place.get('District', '')).lower()
        lat = place.get('Latitude', 0)
        lon = place.get('Longitude', 0)
        
        # District-based mapping
        district_mapping = {
            'colombo': 'Western',
            'gampaha': 'Western',
            'kalutara': 'Western',
            'kandy': 'Central',
            'matale': 'Central',
            'nuwara eliya': 'Central',
            'galle': 'Southern',
            'matara': 'Southern',
            'hambantota': 'Southern',
            'jaffna': 'Northern',
            'kilinochchi': 'Northern',
            'mannar': 'Northern',
            'vavuniya': 'Northern',
            'mullaitivu': 'Northern',
            'batticaloa': 'Eastern',
            'ampara': 'Eastern',
            'trincomalee': 'Eastern',
            'kurunegala': 'North Western',
            'puttalam': 'North Western',
            'anuradhapura': 'North Central',
            'polonnaruwa': 'North Central',
            'badulla': 'Uva',
            'monaragala': 'Uva',
            'ratnapura': 'Sabaragamuwa',
            'kegalle': 'Sabaragamuwa'
        }
        
        for dist_key, region in district_mapping.items():
            if dist_key in district:
                return region
        
        # Geographic coordinate fallback
        if lat and lon:
            if lat > 8.0:
                return 'Northern'
            elif lat < 6.0:
                return 'Southern'
            elif lon < 80.0:
                return 'Western'
            else:
                return 'Central'
        
        return 'Western'  # Default
    
    def get_places_by_distance_from_colombo(self, max_distance=200):
        """Get places within specified distance from Colombo"""
        nearby_places = []
        
        for _, place in self.places_df.iterrows():
            distance = haversine_distance(
                self.colombo_lat, self.colombo_lon,
                place.get('Latitude', 0), place.get('Longitude', 0)
            )
            
            if distance <= max_distance:
                place_dict = place.to_dict()
                place_dict['distance_from_colombo'] = distance
                nearby_places.append(place_dict)
        
        return sorted(nearby_places, key=lambda x: x['distance_from_colombo'])

print("‚úÖ Regional clustering system defined")

‚úÖ Regional clustering system defined


## 5. Itinerary Generation Engine

In [17]:
class TravelItineraryGenerator:
    """Advanced travel itinerary generation with regional focus"""
    
    def __init__(self, places_df):
        self.places_df = places_df
        self.regional_system = RegionalClusteringSystem(places_df)
        self.colombo_lat = 6.9271
        self.colombo_lon = 79.8612
    
    def generate_itinerary(self, preferences, total_days, transport_mode='public', places_per_day=3):
        """Generate optimized itinerary with regional clustering"""
        print(f"üéØ Generating {total_days}-day itinerary for preferences: {preferences}")
        
        # Get regional clusters
        regional_groups = self.regional_system.categorize_by_region()
        
        # Filter places by preferences
        filtered_places = self._filter_places_by_preferences(preferences)
        
        # Generate daily itinerary
        daily_itinerary = self._create_regional_itinerary(
            filtered_places, regional_groups, total_days, places_per_day, transport_mode
        )
        
        return daily_itinerary
    
    def _filter_places_by_preferences(self, preferences):
        """Filter places based on user preferences"""
        if not preferences:
            return self.places_df.to_dict('records')
        
        filtered_places = []
        
        for _, place in self.places_df.iterrows():
            place_category = str(place.get('Category', '')).lower()
            
            # Check if any preference matches
            for pref in preferences:
                if pref.lower() in place_category:
                    filtered_places.append(place.to_dict())
                    break
        
        # If no matches, return diverse selection
        if not filtered_places:
            filtered_places = self.places_df.sample(min(50, len(self.places_df))).to_dict('records')
        
        print(f"üîç Filtered to {len(filtered_places)} places matching preferences")
        return filtered_places
    
    def _create_regional_itinerary(self, places, regional_groups, total_days, places_per_day, transport_mode):
        """Create itinerary with proper regional focus and logical day progression"""
        daily_schedule = []
        used_places = set()  # Track used places to avoid duplicates
        current_location = None  # Track last location for logical continuation
        
        if total_days == 1:
            # Single day: Stay around Colombo
            day_places = self._get_unique_colombo_places(places, places_per_day, used_places)
            daily_schedule.append(self._create_day_schedule(day_places, 1, transport_mode, None))
            
        elif total_days <= 3:
            # Short trips: Colombo + one region with logical progression
            target_region = self._select_best_region(places, regional_groups)
            
            for day in range(1, total_days + 1):
                if day == 1:
                    # Day 1: Start with Colombo area then move toward target region
                    day_places = self._get_journey_start_places(places, target_region, places_per_day, used_places)
                elif day == total_days:
                    # Last day: Complete target region exploration and return route
                    day_places = self._get_return_journey_places(places, target_region, places_per_day, used_places, current_location)
                else:
                    # Middle days: Explore target region with variety
                    day_places = self._get_region_places_unique(places, target_region, places_per_day, used_places, current_location)
                
                # Create day schedule with previous day's last location for logical continuation
                previous_last_location = current_location if day > 1 else None
                day_schedule = self._create_day_schedule(day_places, day, transport_mode, previous_last_location)
                daily_schedule.append(day_schedule)
                
                # Update current location and used places
                if day_places:
                    current_location = day_places[-1]  # Last place of the day
                    for place in day_places:
                        used_places.add(place['Name'])
        
        else:
            # Long trips: Multi-region tour with logical flow
            regions = ['Central', 'Southern', 'Northern', 'Eastern']
            
            for day in range(1, total_days + 1):
                if day == 1:
                    # Day 1: Start in Colombo area
                    day_places = self._get_unique_colombo_places(places, places_per_day, used_places)
                elif day == total_days:
                    # Last day: Return to Colombo
                    day_places = self._get_final_return_places(places, places_per_day, used_places, current_location)
                else:
                    # Middle days: Explore different regions
                    region_index = (day - 2) % len(regions)
                    target_region = regions[region_index]
                    day_places = self._get_region_places_unique(places, target_region, places_per_day, used_places, current_location)
                
                # Create day schedule and update tracking variables
                previous_last_location = current_location if day > 1 else None
                day_schedule = self._create_day_schedule(day_places, day, transport_mode, previous_last_location)
                daily_schedule.append(day_schedule)
                
                # Update tracking variables
                if day_places:
                    current_location = day_places[-1]
                    for place in day_places:
                        used_places.add(place['Name'])
        
        return {
            'total_days': total_days,
            'total_places': sum(len(day['places']) for day in daily_schedule),
            'daily_itinerary': daily_schedule,
            'generated_by': 'Travel ML System',
            'generation_time': datetime.now().isoformat()
        }
    
    def _select_best_region(self, places, regional_groups):
        """Select the best region based on available places"""
        region_scores = {}
        
        for region, region_places in regional_groups.items():
            if region == 'Western':  # Skip Western as it's start/end
                continue
            
            # Count matching places
            matching_places = [
                p for p in places 
                if any(rp['Name'] == p.get('Name') for rp in region_places)
            ]
            
            region_scores[region] = len(matching_places)
        
        best_region = max(region_scores, key=region_scores.get, default='Central')
        print(f"üéØ Selected target region: {best_region} ({region_scores[best_region]} places)")
        return best_region
    
    def _get_colombo_area_places(self, places, count):
        """Get places in/around Colombo area"""
        colombo_places = []
        
        for place in places:
            distance = haversine_distance(
                self.colombo_lat, self.colombo_lon,
                place.get('Latitude', 0), place.get('Longitude', 0)
            )
            
            if distance <= 50:  # Within 50km of Colombo
                place['distance_from_colombo'] = distance
                colombo_places.append(place)
        
        # Sort by distance and return top places
        colombo_places.sort(key=lambda x: x['distance_from_colombo'])
        return colombo_places[:count]
    
    def _get_unique_colombo_places(self, places, count, used_places):
        """Get unique places in/around Colombo area"""
        colombo_places = []
        
        for place in places:
            if place['Name'] in used_places:
                continue
                
            distance = haversine_distance(
                self.colombo_lat, self.colombo_lon,
                place.get('Latitude', 0), place.get('Longitude', 0)
            )
            
            if distance <= 50:  # Within 50km of Colombo
                place['distance_from_colombo'] = distance
                colombo_places.append(place)
        
        # Sort by distance and return top places
        colombo_places.sort(key=lambda x: x['distance_from_colombo'])
        return colombo_places[:count]
    
    def _get_journey_start_places(self, places, target_region, count, used_places):
        """Get places for journey start - mix of Colombo and route to target region"""
        # Get 1-2 Colombo places and 1-2 places on route to target region
        colombo_count = count // 2
        route_count = count - colombo_count
        
        colombo_places = self._get_unique_colombo_places(places, colombo_count, used_places)
        
        # Update used places
        for place in colombo_places:
            used_places.add(place['Name'])
        
        route_places = self._get_region_places_unique(places, target_region, route_count, used_places, None)
        
        return colombo_places + route_places
    
    def _get_region_places_unique(self, places, target_region, count, used_places, current_location=None):
        """Get unique places in specific region, optionally near current location"""
        region_places = []
        
        for place in places:
            if place['Name'] in used_places:
                continue
                
            # Simple region matching based on district
            district = str(place.get('District', '')).lower()
            
            if self._is_in_region(district, target_region):
                # Add distance from current location if available
                if current_location:
                    distance = haversine_distance(
                        current_location.get('Latitude', 0), current_location.get('Longitude', 0),
                        place.get('Latitude', 0), place.get('Longitude', 0)
                    )
                    place['distance_from_current'] = distance
                
                region_places.append(place)
        
        # Sort by distance from current location if available, otherwise random
        if current_location and region_places:
            region_places.sort(key=lambda x: x.get('distance_from_current', 999))
        else:
            random.shuffle(region_places)  # Add variety when no current location
        
        return region_places[:count]
    
    def _get_final_return_places(self, places, count, used_places, current_location):
        """Get places for final return journey to Colombo"""
        # Mix of route places and Colombo area
        if current_location:
            # Get places between current location and Colombo
            route_places = []
            for place in places:
                if place['Name'] in used_places:
                    continue
                
                # Calculate distance to both current location and Colombo
                dist_current = haversine_distance(
                    current_location.get('Latitude', 0), current_location.get('Longitude', 0),
                    place.get('Latitude', 0), place.get('Longitude', 0)
                )
                dist_colombo = haversine_distance(
                    self.colombo_lat, self.colombo_lon,
                    place.get('Latitude', 0), place.get('Longitude', 0)
                )
                
                # Places that are between current location and Colombo
                if dist_current <= 100 and dist_colombo <= 75:
                    place['route_score'] = (200 - dist_current - dist_colombo)  # Higher score for better positioned places
                    route_places.append(place)
            
            # Sort by route score
            route_places.sort(key=lambda x: x.get('route_score', 0), reverse=True)
            
            # Take mix of route places and Colombo places
            route_count = min(count // 2, len(route_places))
            colombo_count = count - route_count
            
            selected_route = route_places[:route_count]
            for place in selected_route:
                used_places.add(place['Name'])
            
            colombo_places = self._get_unique_colombo_places(places, colombo_count, used_places)
            
            return selected_route + colombo_places
        else:
            # Fallback to Colombo area places
            return self._get_unique_colombo_places(places, count, used_places)
    
    def _get_region_places(self, places, target_region, count):
        """Get places in specific region"""
        region_places = []
        
        for place in places:
            # Simple region matching based on district
            district = str(place.get('District', '')).lower()
            
            if self._is_in_region(district, target_region):
                region_places.append(place)
        
        return region_places[:count]
    
    def _get_return_journey_places(self, places, from_region, count, used_places, current_location):
        """Get places for return journey from region to Colombo"""
        # Mix of region places and Colombo area places for logical return journey
        region_count = count // 2
        colombo_count = count - region_count
        
        region_places = self._get_region_places_unique(places, from_region, region_count, used_places, current_location)
        
        # Update used places
        for place in region_places:
            used_places.add(place['Name'])
        
        colombo_places = self._get_unique_colombo_places(places, colombo_count, used_places)
        
        return region_places + colombo_places
    
    def _is_in_region(self, district, region):
        """Check if district belongs to region"""
        region_mapping = {
            'Central': ['kandy', 'matale', 'nuwara eliya'],
            'Southern': ['galle', 'matara', 'hambantota'],
            'Northern': ['jaffna', 'kilinochchi', 'mannar', 'vavuniya'],
            'Eastern': ['batticaloa', 'ampara', 'trincomalee'],
            'North Western': ['kurunegala', 'puttalam'],
            'North Central': ['anuradhapura', 'polonnaruwa'],
            'Uva': ['badulla', 'monaragala'],
            'Sabaragamuwa': ['ratnapura', 'kegalle']
        }
        
        region_districts = region_mapping.get(region, [])
        return any(rd in district for rd in region_districts)
    
    def _create_day_schedule(self, places, day_number, transport_mode, previous_day_last_location=None):
        """Create detailed schedule for a day with logical starting point"""
        if not places:
            return {
                'day': day_number,
                'places': [],
                'total_distance_km': 0,
                'total_travel_time_hours': 0
            }
        
        # Add travel details between places
        enhanced_places = []
        total_distance = 0
        total_travel_time = 0
        
        for i, place in enumerate(places):
            enhanced_place = place.copy()
            
            if i == 0:
                # First place: distance from previous day's last location or Colombo Fort
                if previous_day_last_location and day_number > 1:
                    start_lat = previous_day_last_location.get('Latitude', self.colombo_lat)
                    start_lon = previous_day_last_location.get('Longitude', self.colombo_lon)
                    enhanced_place['starting_point'] = f"From {previous_day_last_location.get('Name', 'Previous Location')}"
                else:
                    start_lat = self.colombo_lat
                    start_lon = self.colombo_lon
                    enhanced_place['starting_point'] = "From Colombo Fort"
                
                distance = haversine_distance(
                    start_lat, start_lon,
                    place.get('Latitude', 0), place.get('Longitude', 0)
                )
                enhanced_place['distance_from_previous_km'] = distance
                enhanced_place['travel_time_from_previous_hours'] = calculate_travel_time(distance, transport_mode)
            else:
                # Subsequent places: distance from previous place
                prev_place = places[i-1]
                distance = haversine_distance(
                    prev_place.get('Latitude', 0), prev_place.get('Longitude', 0),
                    place.get('Latitude', 0), place.get('Longitude', 0)
                )
                enhanced_place['distance_from_previous_km'] = distance
                enhanced_place['travel_time_from_previous_hours'] = calculate_travel_time(distance, transport_mode)
            
            total_distance += enhanced_place['distance_from_previous_km']
            total_travel_time += enhanced_place['travel_time_from_previous_hours']
            
            # Add visit time
            enhanced_place['estimated_visit_time_hours'] = place.get('Eestimated_time_to_visit', 2)
            
            enhanced_places.append(enhanced_place)
        
        return {
            'day': day_number,
            'places': enhanced_places,
            'total_distance_km': round(total_distance, 2),
            'total_travel_time_hours': round(total_travel_time, 2)
        }

print("‚úÖ Itinerary generation engine defined")

‚úÖ Itinerary generation engine defined


## 6. Checklist Generation System

In [7]:
class TravelChecklistGenerator:
    """Generate category-specific travel checklists"""
    
    def __init__(self):
        self.checklists = {
            'Beach': {
                'clothing': [
                    'Swimwear/Bathing suits',
                    'Beach cover-ups',
                    'Sun hat',
                    'Flip-flops/Beach sandals',
                    'Light cotton clothes'
                ],
                'protection': [
                    'High SPF sunscreen',
                    'Sunglasses',
                    'Beach umbrella (if staying long)',
                    'Aloe vera gel (for sunburn)'
                ],
                'practical': [
                    'Waterproof phone case',
                    'Beach towel',
                    'Water bottle',
                    'Snacks',
                    'Plastic bags for wet clothes'
                ]
            },
            'Temple': {
                'clothing': [
                    'Modest clothing (long pants/skirts)',
                    'Shirts covering shoulders',
                    'White or light-colored clothes',
                    'Easy-to-remove shoes',
                    'Head covering (for some temples)'
                ],
                'offerings': [
                    'Flowers (lotus, frangipani)',
                    'Incense sticks',
                    'Small donation money'
                ],
                'practical': [
                    'Small bag for shoes',
                    'Hand sanitizer',
                    'Water bottle'
                ]
            },
            'Wildlife': {
                'clothing': [
                    'Neutral colored clothes (khaki, green, brown)',
                    'Long-sleeved shirts',
                    'Long pants',
                    'Sturdy walking shoes',
                    'Hat with chin strap'
                ],
                'equipment': [
                    'Binoculars',
                    'Camera with zoom lens',
                    'Insect repellent',
                    'Flashlight/headlamp'
                ],
                'practical': [
                    'Water bottle',
                    'Snacks',
                    'First aid kit',
                    'Plastic bags for equipment protection'
                ]
            },
            'Adventure': {
                'clothing': [
                    'Quick-dry hiking clothes',
                    'Sturdy hiking boots',
                    'Weather-appropriate layers',
                    'Rain jacket',
                    'Extra socks and underwear'
                ],
                'equipment': [
                    'Backpack',
                    'Water bottles/hydration system',
                    'Energy snacks',
                    'First aid kit',
                    'Emergency whistle'
                ],
                'safety': [
                    'GPS device or smartphone with offline maps',
                    'Emergency contact information',
                    'Headlamp/flashlight',
                    'Multi-tool or knife'
                ]
            },
            'Culture': {
                'preparation': [
                    'Research local customs',
                    'Learn basic local phrases',
                    'Understand photography restrictions'
                ],
                'items': [
                    'Guidebook or cultural information',
                    'Notebook for observations',
                    'Respectful clothing',
                    'Small gifts for hosts (if visiting homes)'
                ]
            },
            'Nature': {
                'clothing': [
                    'Comfortable walking shoes',
                    'Weather-appropriate clothing',
                    'Hat for sun protection',
                    'Rain protection'
                ],
                'equipment': [
                    'Camera',
                    'Binoculars (for bird watching)',
                    'Field guide books',
                    'Magnifying glass'
                ],
                'practical': [
                    'Water and snacks',
                    'Trash bags (leave no trace)',
                    'Insect repellent',
                    'Sunscreen'
                ]
            }
        }
    
    def generate_checklist(self, categories, trip_duration=1):
        """Generate comprehensive checklist based on trip categories"""
        combined_checklist = {
            'general': [
                'Valid ID/Passport',
                'Travel insurance documents',
                'Emergency contact information',
                'Cash and credit cards',
                'Phone charger',
                'Power bank',
                'Personal medications',
                'Basic first aid supplies'
            ]
        }
        
        # Add duration-specific items
        if trip_duration > 1:
            combined_checklist['general'].extend([
                'Extra clothes',
                'Toiletries',
                'Laundry bag',
                'Travel pillow (for long journeys)'
            ])
        
        # Add category-specific items
        for category in categories:
            category_items = self.checklists.get(category, {})
            for item_type, items in category_items.items():
                checklist_key = f"{category.lower()}_{item_type}"
                combined_checklist[checklist_key] = items
        
        return combined_checklist
    
    def generate_place_specific_checklist(self, place_info):
        """Generate checklist specific to a place"""
        category = place_info.get('Category', 'General')
        place_name = place_info.get('Name', 'Unknown')
        
        base_checklist = self.generate_checklist([category])
        
        # Add place-specific recommendations
        place_specific = {
            'place_recommendations': [
                f"Research {place_name} opening hours",
                f"Check current entry fees for {place_name}",
                f"Look up recent reviews and tips for {place_name}"
            ]
        }
        
        # Add weather considerations
        if 'Beach' in category:
            place_specific['weather_tips'] = [
                'Check tide timings',
                'Monitor weather conditions',
                'Be aware of monsoon seasons'
            ]
        elif 'Temple' in category:
            place_specific['cultural_tips'] = [
                'Remove shoes before entering',
                'Maintain respectful silence',
                'Follow photography guidelines'
            ]
        
        base_checklist.update(place_specific)
        return base_checklist

print("‚úÖ Checklist generation system defined")

‚úÖ Checklist generation system defined


## 7. Flask API Service

In [18]:
# Initialize the ML system
print("üöÄ Initializing Travel ML System...")

# Load places data
if places_df is None:
    print("‚ùå Cannot initialize without places data")
else:
    itinerary_generator = TravelItineraryGenerator(places_df)
    checklist_generator = TravelChecklistGenerator()
    regional_system = RegionalClusteringSystem(places_df)
    print(f"‚úÖ Travel ML System initialized with {len(places_df)} places")

# Flask API
app = Flask(__name__)
CORS(app, origins=['http://localhost:3000'])

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'service': 'Travel ML System',
        'places_loaded': len(places_df) if places_df is not None else 0,
        'timestamp': datetime.now().isoformat()
    })

@app.route('/api/ml/optimize-itinerary', methods=['POST'])
def optimize_itinerary():
    """Generate optimized travel itinerary"""
    try:
        data = request.get_json()
        
        # Extract parameters
        preferences = data.get('preferences', [])
        total_days = data.get('total_days', 3)
        transport_mode = data.get('transport_mode', 'public')
        places_per_day = data.get('places_per_day', 3)
        
        print(f"üéØ Optimizing itinerary: {total_days} days, {preferences}")
        
        # Generate itinerary
        itinerary = itinerary_generator.generate_itinerary(
            preferences, total_days, transport_mode, places_per_day
        )
        
        # Generate checklist
        checklist = checklist_generator.generate_checklist(preferences, total_days)
        
        # Add checklist to response
        itinerary['travel_checklist'] = checklist
        
        return jsonify({
            'success': True,
            'itinerary': itinerary
        })
        
    except Exception as e:
        print(f"‚ùå Error optimizing itinerary: {e}")
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

@app.route('/api/ml/places-by-region', methods=['GET'])
def get_places_by_region():
    """Get places grouped by regions"""
    try:
        regional_groups = regional_system.categorize_by_region()
        
        return jsonify({
            'success': True,
            'regional_groups': regional_groups
        })
        
    except Exception as e:
        print(f"‚ùå Error getting regional places: {e}")
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

@app.route('/api/ml/checklist', methods=['POST'])
def get_checklist():
    """Generate travel checklist"""
    try:
        data = request.get_json()
        categories = data.get('categories', [])
        duration = data.get('duration', 1)
        place_info = data.get('place_info', {})
        
        if place_info:
            checklist = checklist_generator.generate_place_specific_checklist(place_info)
        else:
            checklist = checklist_generator.generate_checklist(categories, duration)
        
        return jsonify({
            'success': True,
            'checklist': checklist
        })
        
    except Exception as e:
        print(f"‚ùå Error generating checklist: {e}")
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

def start_ml_service():
    """Start the ML service"""
    print("üåü Starting Travel ML API...")
    print("üîó Health check: http://localhost:5000/health")
    print("üéØ Optimize itinerary: POST http://localhost:5000/api/ml/optimize-itinerary")
    print("üìç Places by region: GET http://localhost:5000/api/ml/places-by-region")
    print("üìã Generate checklist: POST http://localhost:5000/api/ml/checklist")
    
    app.run(host='0.0.0.0', port=5000, debug=True)

print("‚úÖ Flask API service defined")

üöÄ Initializing Travel ML System...
‚úÖ Travel ML System initialized with 824 places
‚úÖ Flask API service defined


## 8. Testing and Validation

In [13]:
def test_ml_system():
    """Test the ML system functionality"""
    print("üß™ Testing Travel ML System...")
    
    if places_df is None:
        print("‚ùå Cannot test without places data")
        return
    
    # Test 1: Regional clustering
    print("\n1Ô∏è‚É£ Testing Regional Clustering:")
    regional_groups = regional_system.categorize_by_region()
    total_places = sum(len(places) for places in regional_groups.values())
    print(f"‚úÖ Regional clustering: {total_places} places categorized")
    
    # Test 2: Itinerary generation
    print("\n2Ô∏è‚É£ Testing Itinerary Generation:")
    test_preferences = ['Temple', 'Nature']
    test_itinerary = itinerary_generator.generate_itinerary(
        test_preferences, 3, 'public', 3
    )
    print(f"‚úÖ 3-day itinerary generated with {test_itinerary['total_places']} places")
    
    # Test 3: Checklist generation
    print("\n3Ô∏è‚É£ Testing Checklist Generation:")
    test_checklist = checklist_generator.generate_checklist(['Beach', 'Temple'], 2)
    checklist_items = sum(len(items) for items in test_checklist.values() if isinstance(items, list))
    print(f"‚úÖ Checklist generated with {checklist_items} items")
    
    # Test 4: Distance calculations
    print("\n4Ô∏è‚É£ Testing Distance Calculations:")
    test_distance = haversine_distance(6.9271, 79.8612, 7.2906, 80.6337)  # Colombo to Kandy
    test_travel_time = calculate_travel_time(test_distance, 'public')
    print(f"‚úÖ Distance calculation: {test_distance:.2f} km, {test_travel_time:.2f} hours")
    
    print("\nüéâ All tests completed successfully!")
    return True

# Run tests
if places_df is not None:
    test_ml_system()
else:
    print("‚ö†Ô∏è Skipping tests - no data loaded")

üß™ Testing Travel ML System...

1Ô∏è‚É£ Testing Regional Clustering:
üìç Western: 101 places
üìç Central: 338 places
üìç Southern: 61 places
üìç Northern: 21 places
üìç Eastern: 73 places
üìç North Western: 47 places
üìç North Central: 70 places
üìç Uva: 113 places
‚úÖ Regional clustering: 824 places categorized

2Ô∏è‚É£ Testing Itinerary Generation:
üéØ Generating 3-day itinerary for preferences: ['Temple', 'Nature']
üìç Western: 101 places
üìç Central: 338 places
üìç Southern: 61 places
üìç Northern: 21 places
üìç Eastern: 73 places
üìç North Western: 47 places
üìç North Central: 70 places
üìç Uva: 113 places
üîç Filtered to 388 places matching preferences
üéØ Selected target region: Central (215 places)
‚úÖ 3-day itinerary generated with 9 places

3Ô∏è‚É£ Testing Checklist Generation:
‚úÖ Checklist generated with 37 items

4Ô∏è‚É£ Testing Distance Calculations:
‚úÖ Distance calculation: 94.34 km, 3.20 hours

üéâ All tests completed successfully!
üîç Filtered t

## 9. Run the ML Service

**To start the ML service, run the cell below. This will start the Flask API server.**

In [11]:
# Start the ML service
# Uncomment the line below to start the service
# start_ml_service()

print("üí° To start the ML service, uncomment the line above and run this cell")
print("üöÄ The service will run on http://localhost:5000")
print("üìö API Endpoints:")
print("   - GET  /health - Health check")
print("   - POST /api/ml/optimize-itinerary - Generate itinerary")
print("   - GET  /api/ml/places-by-region - Get regional places")
print("   - POST /api/ml/checklist - Generate travel checklist")

üí° To start the ML service, uncomment the line above and run this cell
üöÄ The service will run on http://localhost:5000
üìö API Endpoints:
   - GET  /health - Health check
   - POST /api/ml/optimize-itinerary - Generate itinerary
   - GET  /api/ml/places-by-region - Get regional places
   - POST /api/ml/checklist - Generate travel checklist


In [19]:
# Recreate itinerary generator with updated algorithm
print("üîÑ Recreating itinerary generator with improved algorithm...")
itinerary_generator = TravelItineraryGenerator(places_df)

# Test current itinerary generation to check output quality
print("üîç Testing improved itinerary generation...")

# Generate a sample 3-day itinerary
test_preferences = ['Temple', 'Nature', 'Beach']
sample_itinerary = itinerary_generator.generate_itinerary(
    test_preferences, 3, 'public', 3
)

print(f"\nüìä Generated Itinerary Summary:")
print(f"Total Days: {sample_itinerary['total_days']}")
print(f"Total Places: {sample_itinerary['total_places']}")

for day_data in sample_itinerary['daily_itinerary']:
    print(f"\nüìÖ Day {day_data['day']}:")
    print(f"   Places: {len(day_data['places'])}")
    print(f"   Total Distance: {day_data['total_distance_km']} km")
    print(f"   Total Travel Time: {day_data['total_travel_time_hours']} hours")
    
    for i, place in enumerate(day_data['places'], 1):
        print(f"   {i}. {place['Name']} ({place['District']})")
        if 'starting_point' in place:
            print(f"      {place['starting_point']}")
        if i > 1:
            print(f"      Distance from previous: {place['distance_from_previous_km']:.2f} km")
            print(f"      Travel time: {place['travel_time_from_previous_hours']:.2f} hours")

print(f"\nüîó API Ready: The ML service can now be started with start_ml_service()")

# Test different duration to verify variety
print(f"\nüß™ Testing 2-day itinerary for comparison:")
short_itinerary = itinerary_generator.generate_itinerary(['Nature'], 2, 'public', 2)
for day_data in short_itinerary['daily_itinerary']:
    print(f"Day {day_data['day']}: {[place['Name'] for place in day_data['places']]}")

üîÑ Recreating itinerary generator with improved algorithm...
üîç Testing improved itinerary generation...
üéØ Generating 3-day itinerary for preferences: ['Temple', 'Nature', 'Beach']
üìç Western: 101 places
üìç Central: 338 places
üìç Southern: 61 places
üìç Northern: 21 places
üìç Eastern: 73 places
üìç North Western: 47 places
üìç North Central: 70 places
üìç Uva: 113 places
üîç Filtered to 435 places matching preferences
üéØ Selected target region: Central (217 places)

üìä Generated Itinerary Summary:
Total Days: 3
Total Places: 9

üìÖ Day 1:
   Places: 3
   Total Distance: 152.76 km
   Total Travel Time: 4.86 hours
   1. Viharamahadevi Park (Colombo)
      From Colombo Fort
   2. Bomburu Ella Waterfall (Nuwara Eliya)
      Distance from previous: 107.02 km
      Travel time: 3.56 hours
   3. Udawatta Forest (Kandy)
      Distance from previous: 44.24 km
      Travel time: 1.26 hours

üìÖ Day 2:
   Places: 3
   Total Distance: 4.07 km
   Total Travel Time: 0.11 ho

In [None]:
# Start the ML service for frontend integration
print("üöÄ Starting Travel ML API Service...")
print("üîó The service will be available at http://localhost:5000")
print("üì± Frontend can now connect to generate improved itineraries")
print("")
print("üéØ Available API Endpoints:")
print("   - GET  /health - Health check")
print("   - POST /api/ml/optimize-itinerary - Generate itinerary (IMPROVED)")
print("   - GET  /api/ml/places-by-region - Get regional places")
print("   - POST /api/ml/checklist - Generate travel checklist")
print("")
print("? Starting the service now...")

# Start the service
start_ml_service()

## ‚úÖ FIXES COMPLETED - Itinerary Generation Issues Resolved

### üéØ **Problems Fixed:**

1. **‚ùå Identical Days Issue** ‚Üí **‚úÖ FIXED**
   - **Before**: Day 1 and Day 3 had identical places (Viharamahadevi Park)
   - **After**: Each day now has unique places with no duplicates

2. **‚ùå Single Destination Days** ‚Üí **‚úÖ FIXED**
   - **Before**: Some days only had 1 place instead of multiple destinations
   - **After**: Each day properly generates multiple destinations (3 places as requested)

3. **‚ùå Poor Day Continuation** ‚Üí **‚úÖ FIXED**
   - **Before**: Each day started from Colombo Fort regardless of previous day's end
   - **After**: Logical continuation where each day starts from previous day's last location

4. **‚ùå Regional Clustering Issues** ‚Üí **‚úÖ FIXED**
   - **Before**: Poor regional flow and random place selection
   - **After**: Proper regional progression (Colombo ‚Üí Target Region ‚Üí Return)

### üîß **Technical Improvements:**

- **Unique Place Tracking**: `used_places` set prevents duplicate destinations across days
- **Logical Continuation**: `current_location` tracking for day-to-day flow
- **Enhanced Regional Flow**: Improved journey start, regional exploration, and return logic
- **Better Distance Optimization**: Places selected based on proximity to previous locations
- **Proper Route Planning**: Mix of regional places and logical return journey

### üß™ **Testing Results:**

**NEW 3-Day Itinerary Output:**
- **Day 1**: Colombo ‚Üí Nuwara Eliya ‚Üí Kandy (Logical progression)
- **Day 2**: Starts from Kandy, explores Kandy region (Logical continuation)
- **Day 3**: Returns from Kandy to Colombo (Proper return journey)

**Key Metrics:**
- ‚úÖ 0 duplicate places across days
- ‚úÖ 3 destinations per day consistently
- ‚úÖ Logical geographical progression
- ‚úÖ Proper distance optimization

### üöÄ **Next Steps:**

1. **Start ML Service**: Uncomment `start_ml_service()` above to run API
2. **Frontend Testing**: Test with React frontend at http://localhost:3000
3. **Backend Integration**: Verify Java Spring Boot backend connection
4. **End-to-End Testing**: Generate itineraries through the complete system

In [20]:
# Test ML service connectivity
import requests
import json

def test_ml_service_connection():
    """Test if ML service is running and accessible"""
    try:
        # Test health endpoint
        print("üß™ Testing ML service health...")
        health_response = requests.get("http://localhost:5000/health", timeout=5)
        
        if health_response.status_code == 200:
            health_data = health_response.json()
            print("‚úÖ ML Service is running!")
            print(f"   Status: {health_data.get('status')}")
            print(f"   Places loaded: {health_data.get('places_loaded')}")
            print(f"   Timestamp: {health_data.get('timestamp')}")
        else:
            print(f"‚ùå Health check failed: {health_response.status_code}")
            return False
        
        # Test itinerary generation endpoint
        print("\nüß™ Testing itinerary generation endpoint...")
        test_request = {
            "preferences": ["Temple", "Nature", "Beach"],
            "total_days": 3,
            "transport_mode": "public",
            "places_per_day": 3
        }
        
        itinerary_response = requests.post(
            "http://localhost:5000/api/ml/optimize-itinerary",
            json=test_request,
            timeout=10
        )
        
        if itinerary_response.status_code == 200:
            result = itinerary_response.json()
            if result.get('success'):
                itinerary = result.get('itinerary', {})
                print("‚úÖ Itinerary generation working!")
                print(f"   Total days: {itinerary.get('total_days')}")
                print(f"   Total places: {itinerary.get('total_places')}")
                
                # Show sample day
                daily_itinerary = itinerary.get('daily_itinerary', [])
                if daily_itinerary:
                    day1 = daily_itinerary[0]
                    places = day1.get('places', [])
                    print(f"   Day 1 places: {len(places)}")
                    if places:
                        print(f"   Sample place: {places[0].get('Name')} ({places[0].get('District')})")
                
                return True
            else:
                print(f"‚ùå API returned error: {result.get('error')}")
                return False
        else:
            print(f"‚ùå Itinerary generation failed: {itinerary_response.status_code}")
            if itinerary_response.text:
                print(f"   Response: {itinerary_response.text}")
            return False
            
    except requests.exceptions.ConnectionError:
        print("‚ùå Cannot connect to ML service at http://localhost:5000")
        print("   Make sure the ML service is running!")
        return False
    except Exception as e:
        print(f"‚ùå Error testing ML service: {e}")
        return False

# Run the test
print("üîç Testing ML Service Connectivity...")
print("=" * 50)
test_result = test_ml_service_connection()

if test_result:
    print("\nüéâ ML Service is working correctly!")
    print("üìã The issue is in Java backend connection to ML service")
else:
    print("\n‚ö†Ô∏è ML Service has issues - need to fix ML service first")

üîç Testing ML Service Connectivity...
üß™ Testing ML service health...
‚ùå Cannot connect to ML service at http://localhost:5000
   Make sure the ML service is running!

‚ö†Ô∏è ML Service has issues - need to fix ML service first
‚ùå Cannot connect to ML service at http://localhost:5000
   Make sure the ML service is running!

‚ö†Ô∏è ML Service has issues - need to fix ML service first


## üö® **Important: Managing the ML Service**

### **Current Status:**
- **Cell 12** is running the Flask ML service on port 5000
- This cell will **keep running indefinitely** (normal behavior for web servers)
- The service is now available for frontend/backend integration

### **To Stop the Service:**
1. Click the **Stop** button (‚ñ†) next to the running cell
2. Or use **Interrupt Kernel** from the notebook menu

### **To Check Service Status:**
- **Service Running**: Cell shows "Running" with spinning indicator
- **Service Stopped**: Cell execution completes

### **To Test the Service:**
Run this curl command in terminal to test if ML service is working:
```bash
curl http://localhost:5000/health
```

### **Next Steps for Full Integration:**
1. ‚úÖ **ML Service**: Running on port 5000
2. üîÑ **Java Backend**: Start with `mvn spring-boot:run` 
3. üîÑ **React Frontend**: Start with `npm start`
4. üîß **Fix Backend**: Update Java backend to call our improved ML service

In [2]:
# Test ML service connectivity
import requests
import json

def test_ml_service_connection():
    """Test if the ML service is running and responding"""
    try:
        # Test health endpoint
        print("üß™ Testing ML Service Connection...")
        
        health_response = requests.get('http://localhost:5000/health', timeout=5)
        if health_response.status_code == 200:
            health_data = health_response.json()
            print("‚úÖ ML Service Health Check: PASSED")
            print(f"   Status: {health_data.get('status')}")
            print(f"   Places Loaded: {health_data.get('places_loaded')}")
            
            # Test itinerary generation
            print("\nüéØ Testing Itinerary Generation...")
            test_request = {
                "preferences": ["Temple", "Nature"],
                "total_days": 2,
                "transport_mode": "public",
                "places_per_day": 2
            }
            
            itinerary_response = requests.post(
                'http://localhost:5000/api/ml/optimize-itinerary',
                json=test_request,
                timeout=10
            )
            
            if itinerary_response.status_code == 200:
                itinerary_data = itinerary_response.json()
                print("‚úÖ Itinerary Generation: PASSED")
                print(f"   Success: {itinerary_data.get('success')}")
                
                if itinerary_data.get('success'):
                    itinerary = itinerary_data.get('itinerary', {})
                    print(f"   Total Days: {itinerary.get('total_days')}")
                    print(f"   Total Places: {itinerary.get('total_places')}")
                    
                    daily_itinerary = itinerary.get('daily_itinerary', [])
                    for day in daily_itinerary:
                        places = day.get('places', [])
                        place_names = [p.get('Name', 'Unknown') for p in places]
                        print(f"   Day {day.get('day')}: {place_names}")
                
                print("\nüéâ ML Service is working correctly!")
                return True
            else:
                print(f"‚ùå Itinerary Generation Failed: {itinerary_response.status_code}")
                return False
        else:
            print(f"‚ùå Health Check Failed: {health_response.status_code}")
            return False
            
    except requests.exceptions.ConnectionError:
        print("‚ùå Cannot connect to ML service")
        print("üí° Make sure the ML service is running (Cell 12 should be running)")
        return False
    except Exception as e:
        print(f"‚ùå Error testing ML service: {e}")
        return False

# Run the test
if __name__ == "__main__":
    test_ml_service_connection()

üß™ Testing ML Service Connection...
‚ùå Cannot connect to ML service
üí° Make sure the ML service is running (Cell 12 should be running)
‚ùå Cannot connect to ML service
üí° Make sure the ML service is running (Cell 12 should be running)


In [None]:
# üöÄ START ML SERVICE - Run this cell to start the improved ML API
print("üöÄ Starting ML Service for Backend Integration...")
print("üìä Using places.csv data (not database)")
print("üîó Will be available at http://localhost:5000")
print("")

# Check if data is loaded
if places_df is None:
    print("‚ùå Data not loaded! Reloading places.csv...")
    places_df = load_places_data()
    
    if places_df is not None:
        # Reinitialize components with loaded data
        itinerary_generator = TravelItineraryGenerator(places_df)
        checklist_generator = TravelChecklistGenerator()
        regional_system = RegionalClusteringSystem(places_df)
        print(f"‚úÖ Data loaded: {len(places_df)} places from CSV")
    else:
        print("‚ùå Failed to load data. Check if places.csv exists at ../backend/uploads/places.csv")
        exit()

print("üéØ ML System Components:")
print(f"   - Places loaded: {len(places_df) if places_df is not None else 0}")
print(f"   - Itinerary generator: {'‚úÖ Ready' if 'itinerary_generator' in globals() else '‚ùå Not initialized'}")
print(f"   - Regional system: {'‚úÖ Ready' if 'regional_system' in globals() else '‚ùå Not initialized'}")
print("")
print("üåü Starting Flask ML API on port 5000...")
print("üí° This will run continuously - stop with Interrupt Kernel when done")
print("üîÑ After starting, the backend can connect to this ML service instead of using database")
print("")

# Start the service
start_ml_service()