In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 27 00:40:22 2025

@author: bingh

"""

import pandas as pd
import folium
from folium.plugins import MarkerCluster, Search, MeasureControl
from geopy.geocoders import Nominatim
import time
import re
import os
import random

def super_clean_address(address):
    """Strong cleaning: remove unit numbers, mall names, levels, etc."""
    address = address.replace('BLK', '').replace('BIK', '').replace('BKL', '')
    address = re.sub(r'#\S+', '', address)
    address = re.sub(r'-\d+\s', '', address)
    address = re.sub(r'\(.*?\)', '', address)
    address = re.sub(r'Level\s*\d+', '', address, flags=re.IGNORECASE)
    address = re.sub(r'\b(L\d|B\d|#\d{1,3})\b', '', address, flags=re.IGNORECASE)
    address = re.sub(r'\s+', ' ', address).strip()

    postal_search = re.search(r'(S\d{6})', address)
    if postal_search:
        postal_code = postal_search.group(1)
        address_parts = address.split(postal_code)
        address_cleaned = address_parts[0].strip() + " " + postal_code
    else:
        address_cleaned = address.strip()

    return address_cleaned

def geocode_address(geolocator, address):
    """Try geocoding full address first, fallback to postal code."""
    try:
        location = geolocator.geocode(address + ", Singapore")
        if location:
            return location.latitude, location.longitude, location.address
        else:
            postal_search = re.search(r'(S\d{6})', address)
            if postal_search:
                postal_code = postal_search.group(1)
                location = geolocator.geocode(postal_code + ", Singapore")
                if location:
                    return location.latitude, location.longitude, location.address
        return None, None, None
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        return None, None, None

def plot_and_process_addresses(address_file, cache_file, output_map, failed_output_file):
    # Read the full address file
    df = pd.read_csv(address_file)

    if not {'Outlet Address', 'Staff'}.issubset(df.columns):
        print("Input file must have 'Outlet Address' and 'Staff' columns!")
        return

    # Load cache if exists
    if os.path.exists(cache_file):
        cache_df = pd.read_csv(cache_file)
    else:
        cache_df = pd.DataFrame(columns=['Original Address', 'Cleaned Address', 'Latitude', 'Longitude', 'Full Address', 'Staff'])

    geolocator = Nominatim(user_agent="singapore_final_mapper")

    # Initialize Map
    map_sg = folium.Map(location=[1.3521, 103.8198], zoom_start=11)

    # Add Measurement Tool 📏
    map_sg.add_child(MeasureControl(primary_length_unit='kilometers', secondary_length_unit='meters'))

    # Setup groups
    merchandiser_groups = {}
    color_palette = [
        'red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred',
        'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple',
        'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray'
    ]
    random.shuffle(color_palette)
    merchandiser_color = {}

    failed_list = []
    all_markers = []  # For cluster view

    for idx, row in df.iterrows():
        original_address = row['Outlet Address']
        merchandiser = row['Staff']

        cleaned_address = super_clean_address(original_address)

        cached_row = cache_df[cache_df['Original Address'] == original_address]
        if not cached_row.empty:
            lat = cached_row.iloc[0]['Latitude']
            lon = cached_row.iloc[0]['Longitude']
            full_address = cached_row.iloc[0]['Full Address']
            print(f"Using cached location: {cleaned_address}")
        else:
            lat, lon, full_address = geocode_address(geolocator, cleaned_address)
            if lat and lon:
                new_row = pd.DataFrame({
                    'Original Address': [original_address],
                    'Cleaned Address': [cleaned_address],
                    'Latitude': [lat],
                    'Longitude': [lon],
                    'Full Address': [full_address],
                    'Staff': [merchandiser]
                })
                cache_df = pd.concat([cache_df, new_row], ignore_index=True)
                print(f"Plotted new: {cleaned_address}")
            else:
                print(f"Failed to geocode: {cleaned_address}")
                failed_list.append({'Outlet Address': original_address, 'Cleaned Address': cleaned_address, 'Staff': merchandiser})
                continue

            time.sleep(1)

        # Remove out-of-bounds points
        if lat < 1.22 or lat > 1.47 or lon < 103.6 or lon > 104.1:
            print(f"Skipping out-of-bounds point: {full_address}")
            failed_list.append({'Outlet Address': original_address, 'Cleaned Address': cleaned_address, 'Staff': merchandiser})
            continue

        if merchandiser not in merchandiser_color:
            merchandiser_color[merchandiser] = color_palette[len(merchandiser_color) % len(color_palette)]
        if merchandiser not in merchandiser_groups:
            merchandiser_groups[merchandiser] = folium.FeatureGroup(name=merchandiser)

        popup_html = f"""
        <b>Outlet Address:</b> {full_address}<br>
        <b>Merchandiser (Staff):</b> {merchandiser}
        """
        marker = folium.Marker(
            [lat, lon],
            popup=folium.Popup(popup_html, max_width=300),
            tooltip=merchandiser,
            icon=folium.Icon(color=merchandiser_color[merchandiser], icon="info-sign")
        )

        marker.add_to(merchandiser_groups[merchandiser])
        all_markers.append(marker)

    # Add all merchandiser groups
    for group in merchandiser_groups.values():
        group.add_to(map_sg)

    # Create Cluster Layer
    cluster_layer = MarkerCluster(name="Cluster View")
    for m in all_markers:
        m.add_to(cluster_layer)
    cluster_layer.add_to(map_sg)

    # Add LayerControl
    folium.LayerControl(collapsed=False).add_to(map_sg)

    # Search Bar across all markers
    search_layer = folium.FeatureGroup(name="Searchable Layer")
    for marker in all_markers:
        search_layer.add_child(marker)
    map_sg.add_child(search_layer)
    Search(layer=search_layer, search_label="tooltip", placeholder="Search Merchandiser...").add_to(map_sg)

    # Save cache
    cache_df.to_csv(cache_file, index=False)
    print(f"\nCache updated and saved to: {cache_file}")

    # Save failed list
    if failed_list:
        failed_df = pd.DataFrame(failed_list)
        failed_df.to_csv(failed_output_file, index=False)
        print(f"Failed addresses saved to: {failed_output_file}")
    else:
        print("No failed addresses!")

    # Save final map
    map_sg.save(output_map)
    print(f"\nMap successfully saved to: {output_map}")

if __name__ == "__main__":
    address_file = "C:/Users/bingh/Desktop/Address.csv"
    cache_file = "C:/Users/bingh/Desktop/New folder/geocode_cache_final.csv"
    output_map = "C:/Users/bingh/Desktop/New folder/singapore_final_map.html"
    failed_output_file = "C:/Users/bingh/Desktop/New folder/final_failed_addresses.csv"

    plot_and_process_addresses(address_file, cache_file, output_map, failed_output_file)
