In [23]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import numpy as np
from sklearn.cluster import KMeans
from math import radians, sin, cos, sqrt, atan2
from typing import List, Tuple
import time

class FastClusterRouter:
    def __init__(self):
        self.warehouse_address = None
        self.warehouse_coords = None
    
    def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
        """Calculate distance between points in miles"""
        R = 3959.87433  # Earth's radius in miles
        lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))
        return R * c

    def optimize_cluster(self, addresses: List[Tuple[str, Tuple[float, float]]]) -> Tuple[List[str], float]:
        """Optimize route for a single cluster"""
        n = len(addresses)
        distances = np.zeros((n, n))
        
        # Calculate distances
        for i in range(n):
            for j in range(i+1, n):
                lat1, lon1 = addresses[i][1]
                lat2, lon2 = addresses[j][1]
                dist = self.haversine_distance(lat1, lon1, lat2, lon2)
                distances[i][j] = dist
                distances[j][i] = dist
        
        # Find route using nearest neighbor
        current = 0  # Start with first address
        unvisited = set(range(1, n))
        route = [current]
        total_distance = 0
        
        while unvisited:
            next_stop = min(unvisited, key=lambda x: distances[current][x])
            route.append(next_stop)
            total_distance += distances[current][next_stop]
            unvisited.remove(next_stop)
            current = next_stop
            
        # Return to start
        route.append(0)
        total_distance += distances[current][0]
        
        return [addresses[i][0] for i in route], total_distance
    
    def optimize_large_route(self, addresses: List[Tuple[str, Tuple[float, float]]], cluster_size: int = 10) -> Tuple[List[str], float]:
        """Optimize route for large number of addresses using clustering"""
        start_time = time.time()
        print("Starting large route optimization...")
        
        # Separate warehouse (first address)
        self.warehouse_address = addresses[0]
        delivery_addresses = addresses[1:]
        
        # Convert to numpy array for clustering
        coords = np.array([addr[1] for addr in delivery_addresses])
        
        # Determine number of clusters
        n_clusters = max(len(delivery_addresses) // cluster_size, 1)
        
        # Perform clustering
        print(f"Clustering {len(delivery_addresses)} addresses into {n_clusters} groups...")
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = kmeans.fit_predict(coords)
        
        # Organize addresses by cluster
        clustered_addresses = [[] for _ in range(n_clusters)]
        for i, cluster_id in enumerate(clusters):
            clustered_addresses[cluster_id].append(delivery_addresses[i])
        
        # Optimize route for each cluster
        final_route = [self.warehouse_address[0]]
        total_distance = 0
        
        print("\nOptimizing individual clusters...")
        for i, cluster in enumerate(clustered_addresses):
            if not cluster:
                continue
                
            print(f"Optimizing cluster {i+1}/{n_clusters} ({len(cluster)} addresses)")
            
            # Add warehouse to cluster
            cluster_with_warehouse = [self.warehouse_address] + cluster
            
            # Optimize cluster
            cluster_route, cluster_distance = self.optimize_cluster(cluster_with_warehouse)
            
            # Add to final route (skip warehouse at start/end)
            final_route.extend(cluster_route[1:-1])
            total_distance += cluster_distance
        
        # Return to warehouse
        final_route.append(self.warehouse_address[0])
        
        # Print summary
        elapsed_time = time.time() - start_time
        print(f"\nOptimization completed in {elapsed_time:.2f} seconds")
        print(f"Total stops: {len(final_route)-1}")
        print(f"Estimated total distance: {total_distance:.2f} miles")
        
        return final_route, total_distance

class DeliveryRouter:
    def __init__(self):
        """Initialize router with rate-limited geocoding"""
        # Initialize Nominatim with rate limiting to avoid timeouts
        self.geolocator = Nominatim(user_agent="delivery_router_v1")
        self.geocode = RateLimiter(self.geolocator.geocode, min_delay_seconds=1)
        self.cluster_router = FastClusterRouter()
    
    def validate_address(self, address: str) -> Tuple[bool, tuple]:
        """Validate a Dallas address and return its coordinates"""
        try:
            location = self.geocode(address, timeout=30)
            if location and "Dallas" in location.address:
                return True, (location.latitude, location.longitude)
            return False, None
        except Exception as e:
            print(f"Error validating address {address}: {str(e)}")
            return False, None
    
    def optimize_route_clustered(self, addresses: List[str], cluster_size: int = 10) -> Tuple[List[str], float]:
        """Optimize route for large number of addresses"""
        print("Validating addresses...")
        
        # Convert addresses to coordinates with progress tracking
        valid_addresses = []
        total = len(addresses)
        for i, addr in enumerate(addresses, 1):
            print(f"Validating address {i}/{total}: {addr}")
            valid, coords = self.validate_address(addr)
            if valid:
                valid_addresses.append((addr, coords))
            else:
                print(f"⚠️ Skipping invalid address: {addr}")
        
        if not valid_addresses:
            raise ValueError("No valid addresses found")
            
        print(f"\nValidated {len(valid_addresses)} out of {total} addresses")
        return self.cluster_router.optimize_large_route(valid_addresses, cluster_size)


In [24]:
# Initialize router
router = DeliveryRouter()

# Your list of addresses (warehouse first)
addresses = [ "5351 Fults Blvd., Dallas, TX", "4078 Creekdale Drive, Dallas, TX", "7526 Morton Street, Dallas, TX", "2418 Montalba Avenue, Dallas, TX", "1412 Moran Drive, Dallas, TX", "6211 Annapolis Lane, Dallas, TX", "8814 Redondo Drive, Dallas, TX", "2925 Lenway Street, Dallas, TX", "3203 Bertrand Avenue, Dallas, TX", "7042 Desco Drive, Dallas, TX", "8332 Moorcroft Drive, Dallas, TX", "3724 Purdue Street, Dallas, TX", "7616 Dandy Lane, Dallas, TX", "1249 Olden Street, Dallas, TX", "1014 Delaware Avenue, Dallas, TX", "7318 Crownrich Lane, Dallas, TX", "1634 Engle Avenue, Dallas, TX", "6218 Velasco Avenue, Dallas, TX", "1500 Marilla Street, Dallas, TX", "534 Elkhart Avenue, Dallas, TX", "11322 Cactus Lane, Dallas, TX", "8855 Liptonshire, Dallas, TX", "10223 Clary Drive, Dallas, TX", "9731 Champa Drive, Dallas, TX", "11635 Sahara Way, Dallas, TX", "12167 Ridgelake Drive, Dallas, TX", "11008 Ridgemeadow Drive, Dallas, TX", "10357 Bel Aire, Dallas, TX" ]


try:
    # Optimize route with progress tracking
    print("Starting route optimization...")
    optimized_route, total_miles = router.optimize_route_clustered(addresses, cluster_size=8)
    
    # Print detailed results
    print("\nFinal Route:")
    for i, addr in enumerate(optimized_route):
        print(f"Stop {i}: {addr}")
    print(f"\nTotal estimated distance: {total_miles:.2f} miles")
    
except Exception as e:
    print(f"Error during optimization: {str(e)}")

Starting route optimization...
Validating addresses...
Validating address 1/28: 5351 Fults Blvd., Dallas, TX
Validating address 2/28: 4078 Creekdale Drive, Dallas, TX
Validating address 3/28: 7526 Morton Street, Dallas, TX
Validating address 4/28: 2418 Montalba Avenue, Dallas, TX
Validating address 5/28: 1412 Moran Drive, Dallas, TX
Validating address 6/28: 6211 Annapolis Lane, Dallas, TX
Validating address 7/28: 8814 Redondo Drive, Dallas, TX
Validating address 8/28: 2925 Lenway Street, Dallas, TX
Validating address 9/28: 3203 Bertrand Avenue, Dallas, TX
Validating address 10/28: 7042 Desco Drive, Dallas, TX
Validating address 11/28: 8332 Moorcroft Drive, Dallas, TX
Validating address 12/28: 3724 Purdue Street, Dallas, TX
Validating address 13/28: 7616 Dandy Lane, Dallas, TX
Validating address 14/28: 1249 Olden Street, Dallas, TX
Validating address 15/28: 1014 Delaware Avenue, Dallas, TX
Validating address 16/28: 7318 Crownrich Lane, Dallas, TX
Validating address 17/28: 1634 Engle Ave