In [1]:
import pandas as pd 
from src.get_data import get_data 
from src.get_data import get_connection
from datetime import datetime 
from routing.routing_optimizer import RouteOptimizer
from datetime import datetime 
import folium
import openrouteservice as ors
import math
import numpy as np
import os 
client = ors.Client(key='5b3ce3597851110001cf62485a415b103df64104ad2680c9210ef936')

### Utils

In [2]:
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

def evaluate_unsupervised_clustering(df):
    # Usage:
    X = df[['Latitude', 'Longitude']].values
    labels = df['cluster'].values
    scores = {
        "Silhouette Score": silhouette_score(X, labels).round(2),
        "Davies-Bouldin Index": davies_bouldin_score(X, labels).round(2),
        "Calinski-Harabasz Score": calinski_harabasz_score(X, labels).round(2)
    }
    return scores

In [3]:
def clean_invalid_coordinates(df: pd.DataFrame) -> pd.DataFrame:
    """
    Replaces invalid Latitude (< -90 or > 90) and Longitude (< -180 or > 180) values with 0.0.

    Args:
        df (pd.DataFrame): Input DataFrame with 'Latitude' and 'Longitude' columns.

    Returns:
        pd.DataFrame: DataFrame with corrected coordinate values.
    """
    df = df.copy()

    df.loc[(df['Latitude'] < -90) | (df['Latitude'] > 90), 'Latitude'] = 0.0
    df.loc[(df['Longitude'] < -180) | (df['Longitude'] > 180), 'Longitude'] = 0.0

    return df

In [4]:
def export_data(
        selected_trip,
        all_push_recommendation,
        cluster_summary,
        stock_point_name
    ):
    current_date = datetime.today().date()
    dir_path = f'./recommendation_output/{current_date}'
    
    # Ensure directory exists
    os.makedirs(dir_path, exist_ok=True)
    
    file_path = f'{dir_path}/{stock_point_name}_{current_date}.xlsx'

    with pd.ExcelWriter(file_path) as writer:
        selected_trip.to_excel(writer, sheet_name='Selected Trip', index=False)
        all_push_recommendation.to_excel(writer, sheet_name='All Recommendation', index=False)
        cluster_summary.to_excel(writer, sheet_name='Recommendation Cluster Summary', index=False)


#### Map-Utils

In [5]:
import folium
from clustering.plot_cluster import create_enhanced_cluster_map

def vis_and_save(df_routes, 
                 df_stockpoint,   
                 filename=f'./maps/default_clusters.html'):
    depot_location = [df_stockpoint.Latitude[0], df_stockpoint.Longitude[0]]
    depot_name = df_stockpoint.Stock_point_Name[0]
    map_clusters = create_enhanced_cluster_map(
        df_routes,
        popup_cols=['CustomerID', 'LGA', 'LCDA'],
        tooltip_cols=['LGA', 'LCDA'], 
        zoom_start=10, 
        radius=8
    ).add_child(folium.Marker(location=depot_location, 
                            size = 10, 
                            tooltip=depot_name, 
                            icon=folium.Icon(color="green", 
                            icon="home")))  

    map_clusters.save(filename)
    return map_clusters

#### Cluster Summary Route-Function

In [6]:
def cluster_summary_and_selection(push_recommendation,
                                  sel_trip_cluster,
                                  ):
    ### Cluster Summary 
    cluster_summary = (
        push_recommendation
        .groupby('cluster').agg(
            LGA_list = ('LGA', lambda x: x.unique().tolist()),
            LCDA_List = ('LCDA', lambda x: x.unique().tolist()),
            ncustomer = ('CustomerID','nunique'),
            totalQty = ('EstimatedQuantity','sum'), 
            avg_customer_score = ('composite_customer_score','mean'),
        )
        .reset_index()
        .sort_values(['avg_customer_score','ncustomer', 'totalQty'], 
                     ascending=[False, False, False])
        )

    ### Select Trip   
    df_high_value_cluster_summary = (
            cluster_summary
            .query('ncustomer > 4')
            .head(max(10, sel_trip_cluster))
            .reset_index(drop = True)
        )
    sel_cluster_tuple = df_high_value_cluster_summary.cluster[0:sel_trip_cluster].to_list()
    sel_total_customer_count = df_high_value_cluster_summary.head(sel_trip_cluster).ncustomer.sum()
    print(f'''Select ClusterIDs: {sel_cluster_tuple}''')
    print(f'''Total Number of Customers: {sel_total_customer_count}''')
    print(df_high_value_cluster_summary.head(sel_trip_cluster))

    return cluster_summary, df_high_value_cluster_summary, sel_cluster_tuple, sel_total_customer_count

In [7]:
def run_route_optimizer(df_clustering, sel_cluster_tuple, df_stockpoint, 
                        stock_point_name,
                        sel_total_customer_count, capacity_size = 20):
     # Select cluster 37
     df_sel_clust = df_clustering.query(f'cluster in {sel_cluster_tuple}').query('Latitude > 0')

     # Ensure coordinates are in [longitude, latitude] for ORS
     coords = [[lon, lat] for lat, lon in zip(df_sel_clust.Latitude, df_sel_clust.Longitude)]
     # Print number of jobs
     print("Number of customer locations:", len(coords))
     # Convert depot_location to ORS format
     # Assuming depot_location is [lat, lon], flip to [lon, lat]
     vehicle_start = [df_stockpoint.Longitude[0], df_stockpoint.Latitude[0]]
     num_vehicles = math.ceil(sel_total_customer_count / capacity_size)
     vehicles = [
          ors.optimization.Vehicle(
               id=i,
               profile='driving-car',
               start=vehicle_start,
               end=vehicle_start,
               capacity=[capacity_size]
          ) for i in range(num_vehicles)
     ]

     # Define jobs (each customer gets amount=[1])
     jobs = [ors.optimization.Job(id=index, location=coord, amount=[1]) for index, coord in enumerate(coords)]

     # Call ORS optimization API
     optimized = client.optimization(jobs=jobs, vehicles=vehicles, geometry=True)

     #     ------ MAP
     depot_location = [df_stockpoint.Latitude[0], df_stockpoint.Longitude[0]]
     depot_name = df_stockpoint.Stock_point_Name[0]

     map_clusters_route = create_enhanced_cluster_map(
     df_sel_clust,
     popup_cols=['CustomerID', 'LGA', 'LCDA'],
     tooltip_cols=['LGA', 'LCDA'], 
     zoom_start=10, 
     radius=10
     ).add_child(folium.Marker(location=depot_location, 
                         size = 10, 
                         tooltip=depot_name, 
                         icon=folium.Icon(color="green", 
                         icon="home")))

     # line_colors = ['green', 'orange', 'blue', 'yellow']
     separable_colors = [
          "#1f77b4",  # blue
          "#ff7f0e",  # orange
          "#2ca02c",  # green
          "#d62728",  # red
          "#9467bd",  # purple
          "#8c564b",  # brown
          "#e377c2",  # pink
          "#7f7f7f",  # gray
          "#bcbd22",  # yellow-green
          "#17becf",  # cyan
          "#aec7e8",  # light blue
          "#ffbb78",  # light orange
          ]

     line_colors = separable_colors[0:num_vehicles] #['green', 'orange', 'blue', 'yellow']
     for route in optimized['routes']:
          folium.PolyLine(locations=[list(reversed(coords)) for coords in ors.convert.decode_polyline(route['geometry'])['coordinates']], color=line_colors[route['vehicle']]).add_to(map_clusters_route)

     #
     selected_trip_map_path = f'./recommendation_output/selected_trip_map/{stock_point_name}_{datetime.today().date()}.html' 
     map_clusters_route.save(selected_trip_map_path)

In [8]:
def prep_selected_trip(push_recommendation, 
                       df_high_value_cluster,
                       df_master_customer_dim,  
                       df_stockpoint,
                       sel_cluster_tuple):
    
    sel_columns = ['Stock_Point_ID', 
                'StateName', # 'Region', 
                'Latitude', 'Longitude', 'LGA', 'LCDA', 'cluster', 
                'CustomerID', 'SKUID', 'ProductName', 'Output',
                'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
                'EstimatedQuantity', 
                # 'CustomerSKUscoreRank'
                ]

    sel_cols_cust= ['Stock_Point_ID', 'CustomerID', 'ContactName',  'CustomerModeName',   'ContactPhone', 'FullAddress', 
                    'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status']

    final_cols = ['Stock_Point_ID', 'Stock_point_Name', 'TripID', 
                'LGA_list', 'LCDA_List', 'ncustomer', 'totalQty','avg_customer_score', 'CustomerID', 'ContactName',  
                'CustomerModeName',   'ContactPhone', 'FullAddress',
                'Latitude', 'Longitude', 'LGA', 'LCDA',   
                'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status', 
                'CustomerID', 'SKUID', 'ProductName', 'Output',
                'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
                'EstimatedQuantity',
                ]
    
    df_selected_trip = push_recommendation[push_recommendation['cluster'].isin(sel_cluster_tuple)][sel_columns]
    push_recommendation_trip = (
                                df_selected_trip
                                .merge(df_high_value_cluster, how='left', on = 'cluster' )
                                .merge(df_master_customer_dim[sel_cols_cust], how='left', on = ['Stock_Point_ID', 'CustomerID'])
                                .merge(df_stockpoint[['Stock_Point_ID', 'Stock_point_Name']], how='left', on = ['Stock_Point_ID'])
                                .rename(columns={'cluster':'TripID'})
                                [final_cols]
                                )

    return push_recommendation_trip

In [9]:
def cluster_trip_route(df_sku_rec, 
                       df_customer_dim, 
                       df_stockpoint,
                       stock_point_id,
                       max_customers_per_route, 
                       max_volume_per_route,
                       max_distance_km, 
                       clustering_method='divisive'):
    
    optimizer = RouteOptimizer(
        max_customers_per_route=max_customers_per_route,
        max_volume_per_route=max_volume_per_route,
        max_distance_km = max_distance_km
    )

    optimizer.load_data(df_sku_rec, df_customer_dim, df_stockpoint)
    print("✓ Route optimizer initialized")

    # STEP 3: Generate Routes for Stock Point 1647113
    print("\n3. Generating Optimized Routes...")
    print("-" * 40) 

    stock_point = df_stockpoint[df_stockpoint['Stock_Point_ID'] == stock_point_id].reset_index(drop = True)
    
    stock_point_coords = (stock_point['Latitude'], stock_point['Longitude'])
        
    clustering_customers_df = optimizer.filter_customers_for_stockpoint(stock_point_id)

    df_clustering, n_clusters = optimizer.create_geographic_clusters(clustering_customers_df, 
                                                                     clustering_method = clustering_method)

    routes = optimizer.generate_multi_trip_routes(stock_point_id, 
                                                  max_trips=5, 
                                                  clustering_method=clustering_method)
    df_routes = pd.DataFrame(routes)


    # STEP 4: Analyze Results
    print("\n4. Route Analysis & Results...")
    print("-" * 40)

    push_recommendation = df_sku_rec.merge(df_clustering[['Stock_Point_ID','CustomerID', 'cluster']], 
                                           how='inner', on =['Stock_Point_ID','CustomerID'] )
    
    ### Cluster Evaluation
    evaluate_unsupervised_clustering(df_clustering)

    return push_recommendation, df_clustering, df_routes, stock_point_coords
    

#### Preprocessing Functions

In [10]:
def preprocessing(df_customer_sku_recommendation_raw, 
                      df_customer_dim_with_affinity_score_raw, 
                      df_stockpoint_dim_raw,
                      df_customer_score,
                      df_kyc_customer) :
    
    df_customer_sku_recommendation_raw['Stock_Point_ID'] = df_customer_sku_recommendation_raw['Stock_Point_ID'].astype(int)
    df_customer_dim_with_affinity_score_raw['Stock_Point_ID'] = df_customer_dim_with_affinity_score_raw['Stock_Point_ID'].astype(int)
    df_stockpoint_dim_raw['Stock_Point_ID'] = df_stockpoint_dim_raw['Stock_Point_ID'].astype(int)
    df_customer_score = df_customer_score.rename(columns={'StockPointID':'Stock_Point_ID'})
    df_customer_score['Stock_Point_ID'] = df_customer_score['Stock_Point_ID'].astype(int)


    # ----------------- CUSTOMER DIM TABLE 
    col_sel_affinity = ['Region', 'Stock_Point_ID', 'CustomerID']

    col_sel_kyc = ['CustomerID', 'ContactName', 'BusinessName', 'CustomerModeName',
        'CustomerRef', 'ContactPhone', 'CustomerType', 'FullAddress', 
        'StateName', 'CityName', 'TownName', 'Latitude','Longitude', 
        'DistanceVarianceInMeter', 'IsLocationSubmitted',
        'IsLocationCaptured', 'IsLocationVerified','CustomerStatus',
        'RejectReason',  'KYC_Capture_Status',  'lastDelvDate', 
        # 'hasPOS','hasVAS', 'hasBNPL', 'lastDelvDate', 
        'isActive']

    col_sel_score = ['Stock_Point_ID', 'CustomerID', 'composite_customer_score',
        'percentile_rank', 'active_months_pct', 'avg_orders_per_active_month',
        'avg_qty_per_month', 'avg_revenue_per_month', 'days_since_last_order']

    df_master_customer_dim = (
                df_customer_dim_with_affinity_score_raw[col_sel_affinity]
                .merge(df_kyc_customer[col_sel_kyc], how='inner', on=['CustomerID'])
                .merge(df_customer_score[col_sel_score], how='left', on=['Stock_Point_ID', 'CustomerID'])
                .rename(columns = {'CityName':'LGA',
                                'TownName':'LCDA'
                                })

            )

    df_master_customer_dim['CustomerPurchaseReceny'] =  df_master_customer_dim['lastDelvDate'].apply(lambda x: (datetime.now() - x).days)
    df_master_customer_dim['KYC_Capture_Status'] = df_master_customer_dim['KYC_Capture_Status'].apply(lambda x: 'Yes' if x == 1 else 'No')
    # Fix Missing value -------------------------------------------
    for col in ['BusinessName', 'CustomerModeName', 'FullAddress', 'LGA', 'LCDA']:
        df_master_customer_dim[col] = df_master_customer_dim[col].fillna('')

    for col in ['Latitude',  'Longitude',
                'composite_customer_score',  'percentile_rank',  
                'active_months_pct',  
                'avg_orders_per_active_month',  'avg_qty_per_month',  'avg_revenue_per_month'
                ]:
        df_master_customer_dim[col] = pd.to_numeric(df_master_customer_dim[col], errors='coerce').fillna(0) 

    df_master_customer_dim = clean_invalid_coordinates(df_master_customer_dim)

    # ----------------- RECOMMENDATION
    col2 = ['EstimatedQuantity', 'CustomerSKUscore', 'CustomerSKUscoreStandardize', 'CustomerSKUscoreRank']
    for col in col2: 
        df_customer_sku_recommendation_raw[col] = pd.to_numeric(df_customer_sku_recommendation_raw[col], errors='coerce')

    df_customer_sku_recommendation_raw['LastDeliveredDate'] = pd.to_datetime(df_customer_sku_recommendation_raw['LastDeliveredDate'])
    # Get today's date
    today = pd.Timestamp.today()

    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['LastDeliveredDate'].apply(lambda x: (datetime.now() - x).days)
    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['Recency'].fillna(max(df_customer_sku_recommendation_raw['Recency']))
    
    # ----------------- STOCKPOINT
    df_stockpoint_dim_raw.rename(columns={'lattitude':'Latitude', 'longitude':'Longitude'}, inplace=True) 
    col3 = ['Latitude', 'Longitude']
    for col in col3: 
        df_stockpoint_dim_raw[col] = pd.to_numeric(df_stockpoint_dim_raw[col], errors='coerce').fillna(0)    

    # Replace invalid latitude values with NaN
    df_stockpoint_dim_raw = clean_invalid_coordinates(df_stockpoint_dim_raw)   
    

    return df_customer_sku_recommendation_raw, df_master_customer_dim, df_stockpoint_dim_raw

In [11]:
def data_filter(df_customer_sku_recommendation, df_master_customer_dim, df_stockpoint_dim,
                stockpoint_id,  sku_recency = 7, customer_recency = 90, number_recommendation = 5,
                estimate_qty_scale_factor = .90, max_estimated_qty = 5):
    
    df_customer_sku_recommendation = df_customer_sku_recommendation.copy().query(f'Stock_Point_ID == {stockpoint_id}')
    # Filter Recommendation
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['ProductTag'] != 'Standard-Inactive']
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['Medium'] != 'Never Purchased']

    # Filter customer base
    df_master_customer_dim['valid_for_push'] = np.where(
                                                    (
                                                        (df_master_customer_dim['IsLocationCaptured'] == 'Yes') |
                                                        (df_master_customer_dim['DistanceVarianceInMeter'] <= 150.0) |
                                                        (df_master_customer_dim['KYC_Capture_Status'] == 1) |
                                                        (df_master_customer_dim['CustomerPurchaseReceny'] <= customer_recency)
                                                    ),
                                                    1,
                                                    0
                                                )
    # df_master_customer_dim = df_master_customer_dim[df_master_customer_dim['CustomerPurchaseReceny'] <= customer_recency]
    df_master_customer_dim = df_master_customer_dim.query('valid_for_push == 1')  
    
    # # Clipping Max Estimated Quantity to 10 qty
    df_customer_sku_recommendation['EstimatedQuantity_bck'] = df_customer_sku_recommendation['EstimatedQuantity']
    df_customer_sku_recommendation['EstimatedQuantity'] = df_customer_sku_recommendation['EstimatedQuantity'].apply(lambda x: max_estimated_qty if int((x*estimate_qty_scale_factor)) > max_estimated_qty else int((x*estimate_qty_scale_factor)) )


    # Select top 10 SKU by SKURank per customer
    df_customer_sku_recommendation = (
        df_customer_sku_recommendation
        .query('EstimatedQuantity > 1')
        .sort_values(['CustomerID','CustomerSKUscoreRank'])
        .groupby('CustomerID', group_keys=False)
        .head(number_recommendation)
        .reset_index(drop=True) 
    )

    df_customer_sku_recommendation_ = df_master_customer_dim.merge(df_customer_sku_recommendation, how='inner', on = ['CustomerID','Stock_Point_ID']) 

    df_stockpoint_dim = df_stockpoint_dim.query(f'Stock_Point_ID == {stockpoint_id}').reset_index(drop=True) 
    

    df_customer_dim = df_master_customer_dim.merge(df_customer_sku_recommendation_['CustomerID'].drop_duplicates(), how='inner', on = 'CustomerID')
    # df_customer_dim = df_customer_dim.merge(df_customer_dim_with_affinity_score[sel_cols], how='inner', on = 'CustomerID').reset_index(drop = True) 
    
    print(f'Total Quantity before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").EstimatedQuantity.sum():,}')
    print(f'Total Quantity: {df_customer_sku_recommendation_.EstimatedQuantity.sum():,}')
    print(f'Total Number of Customers before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").CustomerID.nunique():,}')
    print(f'Total Number of Customers: {df_customer_dim.CustomerID.nunique():,}')

 
    return df_customer_sku_recommendation_, df_customer_dim,   df_stockpoint_dim  

### Main Function

In [12]:
def run_push_recommendation(df_customer_sku_recommendation, 
                            df_master_customer_dim, 
                            df_stockpoint_dim, 
                            stock_point_id,
                            stock_point_name,
                            sku_recency = 7, 
                            customer_recency = 60, number_recommendation = 5, 
                            estimate_qty_scale_factor = 1, max_estimated_qty = 5,
                            max_customers_per_route=20,
                            max_volume_per_route=300,
                            max_distance_km = 40,
                            sel_trip_cluster = 5,
                            clustering_method = 'divisive'):
    """
    Main execution function demonstrating complete route optimization workflow
    """ 

    print("=" * 80)
    print("ROUTE OPTIMIZATION FOR PUSH SALES RECOMMENDATIONS")
    print(f"StockPoint: {stock_point_name}, StockPointID: {stock_point_id},")
    print("=" * 80)

    # STEP 1: Load or Generate Data
    print("\n1. Loading Data...")
    print("-" * 40)

    df_sku_rec, df_customer_dim, df_stockpoint  = data_filter(df_customer_sku_recommendation, 
                                                                df_master_customer_dim, 
                                                                df_stockpoint_dim, 
                                                                stockpoint_id = stock_point_id,  
                                                                sku_recency = sku_recency, 
                                                                customer_recency = customer_recency, 
                                                                number_recommendation = number_recommendation,
                                                                estimate_qty_scale_factor = estimate_qty_scale_factor, 
                                                                max_estimated_qty = max_estimated_qty)


    print(f"✓ Loaded {len(df_sku_rec)} SKU recommendations")
    print(f"✓ Loaded {len(df_customer_dim)} customer records")
    print(f"✓ Loaded {len(df_stockpoint)} stock points")

    push_recommendation, df_clustering, df_routes, stock_point_coords = cluster_trip_route(df_sku_rec, 
                                                                                            df_customer_dim, 
                                                                                            df_stockpoint,
                                                                                            stock_point_id,
                                                                                            max_customers_per_route, 
                                                                                            max_volume_per_route,
                                                                                            max_distance_km,
                                                                                            clustering_method)
        
    ### Cluster Evaluation
    print(evaluate_unsupervised_clustering(df_clustering))

    ### Trip Maps
    try:
        trip_map_path = f'./recommendation_output/trip_map/{stock_point_name}_{datetime.today().date()}.html' 
        map_clusters = vis_and_save(df_routes= (df_routes
                                            .rename(columns={'cluster':'cluster_bck'})
                                            .rename(columns={'TripNumber':'cluster'})
                                            ), 
                                    df_stockpoint=df_stockpoint, 
                                    filename=trip_map_path)
    except Exception as e:
        print(f'Unable to save the generated map image: {e}')

    ### Cluster Summary 
    cluster_summary, df_high_value_cluster_summary, sel_cluster_tuple, sel_total_customer_count = cluster_summary_and_selection(
                                  push_recommendation,
                                  sel_trip_cluster,
                                  )

    ## Route Optimization
    try:
        run_route_optimizer(df_clustering, sel_cluster_tuple, df_stockpoint, 
                        stock_point_name,
                        sel_total_customer_count, 
                        capacity_size = 20)
    except Exception as e:
        print(f'Unable to generate route mapping using orc: {e}')

    ## Trip
    push_recommendation_trip = prep_selected_trip(push_recommendation, 
                                                  df_high_value_cluster_summary, 
                                                  df_master_customer_dim,  
                                                  df_stockpoint,
                                                  sel_cluster_tuple)
    
    ### Export Data
    try:
        export_data(
                selected_trip = push_recommendation_trip,
                all_push_recommendation = push_recommendation,
                cluster_summary = cluster_summary,
                stock_point_name = stock_point_name
            )
    except Exception as e:
        print(f'Unable to generate route mapping using orc: {e}')

    dict_ = {
        'stock_point_name': stock_point_name,
        'selected_trip': push_recommendation_trip,
        'all_push_recommendation': push_recommendation,
        'cluster_summary': cluster_summary
    }

    return dict_
    #push_recommendation, df_clustering, df_routes, trip_summary, stock_point_coords, df_stockpoint

# Usage

In [13]:
## Load Data
df_customer_sku_recommendation_raw = pd.read_feather('./input/customer_sku_recommendation.feather').rename(columns={'FCID':'Stock_Point_ID','CustomerId':'CustomerID'})
df_customer_dim_with_affinity_score_raw = pd.read_feather('./input/customer_dim_with_affinity_score.feather').rename(columns={'FCID':'Stock_Point_ID'})
df_stockpoint_dim_raw = pd.read_feather('./input/stockpoint_dim.feather')
df_kyc_customer = pd.read_feather('./input/all_customer_dim_table.feather')
df_customer_score = pd.read_feather('./input/df_customer_score.feather')

In [14]:
# Preprocessing
df_customer_sku_recommendation, df_master_customer_dim, df_stockpoint_dim = preprocessing(df_customer_sku_recommendation_raw, 
                                                                                                        df_customer_dim_with_affinity_score_raw, 
                                                                                                        df_stockpoint_dim_raw,
                                                                                                        df_customer_score,
                                                                                                        df_kyc_customer)

In [29]:
# # Data Filter - Testing 
# causeway, causeway_customer_dim, causeway_stockpoint, = data_filter(df_customer_sku_recommendation, 
#                                                                     df_master_customer_dim, 
#                                                                     df_stockpoint_dim, stockpoint_id = 1647113,  
#                                                                     sku_recency = 7, customer_recency = 60, number_recommendation = 5,
#                                                                     estimate_qty_scale_factor = 1, max_estimated_qty = 5)

In [65]:
df_stockpoint_dim.query(' Stock_Point_ID == 1647113')

for index, row in df_stockpoint_dim.iterrows():
    if index == 42:
        stock_point_id =  row['Stock_Point_ID']
        stock_point_name = row['Stock_point_Name']
        print(f'Stock Point ID: {stock_point_id} || Stock Point Name: {stock_point_name}')  # Access by column name


Stock Point ID: 1647113 || Stock Point Name: OmniHub Apapa Lagos - CAUSEWAY


In [69]:
ALL_STOCKPOINTS_RESULT = {}
for index, row in df_stockpoint_dim.iterrows():
    if index == 42:
        stock_point_id =  row['Stock_Point_ID']
        stock_point_name = row['Stock_point_Name']
        print(f'Stock Point ID: {stock_point_id} || Stock Point Name: {stock_point_name}')  # Access by column name

        res_dict = run_push_recommendation(df_customer_sku_recommendation, 
                                    df_master_customer_dim, 
                                    df_stockpoint_dim, 
                                    stock_point_id,
                                    stock_point_name,
                                    sku_recency = 7, 
                                    customer_recency = 60, number_recommendation = 5, 
                                    estimate_qty_scale_factor = 1, max_estimated_qty = 5,
                                    max_customers_per_route=20,
                                    max_volume_per_route=300,
                                    max_distance_km = 40,
                                    sel_trip_cluster = 5,
                                    clustering_method = 'divisive')

Stock Point ID: 1647113 || Stock Point Name: OmniHub Apapa Lagos - CAUSEWAY
ROUTE OPTIMIZATION FOR PUSH SALES RECOMMENDATIONS
StockPoint: OmniHub Apapa Lagos - CAUSEWAY, StockPointID: 1647113,

1. Loading Data...
----------------------------------------
Total Quantity before filter: 19,553
Total Quantity: 17,445
Total Number of Customers before filter: 1,038
Total Number of Customers: 926
✓ Loaded 4405 SKU recommendations
✓ Loaded 926 customer records
✓ Loaded 1 stock points
✓ Route optimizer initialized

3. Generating Optimized Routes...
----------------------------------------

4. Route Analysis & Results...
----------------------------------------
{'Silhouette Score': np.float64(0.37), 'Davies-Bouldin Index': np.float64(0.77), 'Calinski-Harabasz Score': np.float64(89875.0)}
Select ClusterIDs: [26, 77, 89, 51, 74]
Total Number of Customers: 38
   cluster                               LGA_list  \
0       26  [Surulere, Ajeromi Ifelodun, , Apapa]   
1       77                         [

In [75]:
print(res_dict.keys())

dict_keys(['stock_point_name', 'selected_trip', 'all_push_recommendation', 'cluster_summary'])


In [80]:
[col  for col in res_dict['all_push_recommendation'].columns if 'Phone' in col]

['ContactPhone']

# Setup

### Get Data

In [None]:
# @contextmanager
import pandas as pd 
from src.get_connection import get_connection

def get_kyc_customers():
    # Get a database connection
    conn = get_connection()
    cursor = conn.cursor()

    # Execute the stored procedure
    cursor.execute("EXEC usp_GetCustomerKYCInfoDetailsV2")

    # Fetch the first result set into a DataFrame
    rows = cursor.fetchall()
    df_kyc_customer = pd.DataFrame.from_records(rows, columns=[column[0] for column in cursor.description])

    cursor.close()
    conn.close()
    return df_kyc_customer

In [None]:
def fetch_customer_score() -> pd.DataFrame:
    """
    Executes the stored procedure 'poc_stockpoint_customer_score' and returns the result as a DataFrame.

    Returns:
        pd.DataFrame: DataFrame containing customer score data.
    """
    conn = get_connection()
    cursor = conn.cursor()

    cursor.execute("SELECT * FROM VConnectMasterDWR..poc_stockpoint_customer_score")
    rows = cursor.fetchall()
    df_customer_score = pd.DataFrame.from_records(rows, columns=[col[0] for col in cursor.description])

    cursor.close()
    conn.close()

    return df_customer_score


In [None]:
df_kyc_customer = get_kyc_customers() # 15mins || 7mins
# print('Saving file to disk ...') 
df_kyc_customer.to_feather('./input/all_customer_dim_table.feather')

In [None]:
df_customer_score = df = fetch_customer_score()
df_customer_score.to_feather('./input/df_customer_score.feather')

In [None]:
df_customer_score.shape

In [None]:

# df_customer_sku_recommendation_raw,  df_customer_dim_with_affinity_score_raw, df_stockpoint_dim_raw = get_data() # 1mins

### Load Data

In [None]:
## Load Data
df_customer_sku_recommendation_raw = pd.read_feather('./input/customer_sku_recommendation.feather').rename(columns={'FCID':'Stock_Point_ID','CustomerId':'CustomerID'})
df_customer_dim_with_affinity_score_raw = pd.read_feather('./input/customer_dim_with_affinity_score.feather').rename(columns={'FCID':'Stock_Point_ID'})
df_stockpoint_dim_raw = pd.read_feather('./input/stockpoint_dim.feather')
df_kyc_customer = pd.read_feather('./input/all_customer_dim_table.feather')
df_customer_score = pd.read_feather('./input/df_customer_score.feather')

In [None]:
# df_stockpoint_dim_raw.columns

### Test

In [None]:
df_customer_score.columns

In [None]:
# df_customer_dim_with_affinity_score_raw.columns

In [None]:
df_customer_dim_with_affinity_score_raw.shape

In [None]:
# df_kyc_customer.columns

In [None]:
# df_customer_score.columns

In [None]:
sel_cols_customer = ['CustomerID', 'ContactName', 'BusinessName', 
                        'CustomerModeName', 'ContactPhone', 'CustomerType', #'Location', 'Address',
                        'FullAddress', 'StateName', 'CityName', 'TownName', 'Latitude',
                        'Longitude', #'status', 
                        'DistanceVarianceInMeter', 'IsLocationSubmitted',
                        'LocationSubmittedDate', 'IsLocationCaptured', 'CustomerStatus',
                        'KYC_Capture_Status', 'lastDelvDate', 'isActive']

In [None]:
col_sel_affinity = ['Region', 'Stock_Point_ID', 'CustomerID']

col_sel_kyc = ['CustomerID', 'ContactName', 'BusinessName', 'CustomerModeName',
       'CustomerRef', 'ContactPhone', 'CustomerType', 'FullAddress', 
       'StateName', 'CityName', 'TownName', 'Latitude','Longitude', 
       'DistanceVarianceInMeter', 'IsLocationSubmitted',
       'IsLocationCaptured', 'IsLocationVerified','CustomerStatus',
       'RejectReason',  'KYC_Capture_Status',  'lastDelvDate', 
       # 'hasPOS','hasVAS', 'hasBNPL', 'lastDelvDate', 
       'isActive']

col_sel_score = ['StockPointID', 'CustomerID', 'composite_customer_score',
       'percentile_rank', 'active_months_pct', 'avg_orders_per_active_month',
       'avg_qty_per_month', 'avg_revenue_per_month', 'days_since_last_order']

In [None]:
df_customer_score[col_sel_score].head(1)

In [None]:
df_customer_dim_with_affinity_score_raw.columns

In [None]:
# Merging All Customer Information and Score

df_master_customer_dim = (
    df_customer_dim_with_affinity_score_raw[col_sel_affinity]
    .merge(df_kyc_customer[col_sel_kyc], how='inner', on=['CustomerID'])
    .merge(df_customer_score[col_sel_score], how='left', on=['Stock_Point_ID', 'CustomerID'])
    .rename(columns = {'CityName':'LGA',
                       'TownName':'LCDA'
                       })

)

df_master_customer_dim['CustomerPurchaseReceny'] =  df_master_customer_dim['lastDelvDate'].apply(lambda x: (datetime.now() - x).days)
print(df_customer_score.CustomerID.nunique())
print(df_kyc_customer.CustomerID.nunique())
print(df_customer_dim_with_affinity_score_raw.CustomerID.nunique())
print(df_master_customer_dim.CustomerID.nunique())

# df_master_customer_dim.isna().sum()

# --------------------------------------------------------- #
# --------------------------------------------------------- #
# Fix Missing value -------------------------------------------
for col in ['BusinessName', 'CustomerModeName', 'FullAddress', 'LGA', 'LCDA']:
    df_master_customer_dim[col] = df_master_customer_dim[col].fillna('')

for col in ['Latitude',  'Longitude',
            'composite_customer_score',  'percentile_rank',  
            'active_months_pct',  
            'avg_orders_per_active_month',  'avg_qty_per_month',  'avg_revenue_per_month'
            ]:
    df_master_customer_dim[col] = pd.to_numeric(df_master_customer_dim[col], errors='coerce').fillna(0) 


# df_master_customer_dim['DistanceVarianceInMeter'] = df_master_customer_dim['DistanceVarianceInMeter'].fillna(9999)
# df_master_customer_dim['days_since_last_order'] = df_master_customer_dim['DistanceVarianceInMeter'].fillna(60*6)


In [None]:
df_master_customer_dim.info()

In [None]:
df_master_customer_dim.isna().sum().reset_index(name='isna').query('isna >0')['index'].to_list()

### Preprocessing Functions

In [None]:
def preprocessing(df_customer_sku_recommendation_raw, 
                      df_customer_dim_with_affinity_score_raw, 
                      df_stockpoint_dim_raw,
                      df_customer_score,
                      df_kyc_customer) :
    
    df_customer_sku_recommendation_raw['Stock_Point_ID'] = df_customer_sku_recommendation_raw['Stock_Point_ID'].astype(int)
    df_customer_dim_with_affinity_score_raw['Stock_Point_ID'] = df_customer_dim_with_affinity_score_raw['Stock_Point_ID'].astype(int)
    df_stockpoint_dim_raw['Stock_Point_ID'] = df_stockpoint_dim_raw['Stock_Point_ID'].astype(int)
    df_customer_score = df_customer_score.rename(columns={'StockPointID':'Stock_Point_ID'})
    df_customer_score['Stock_Point_ID'] = df_customer_score['Stock_Point_ID'].astype(int)


    # ----------------- CUSTOMER DIM TABLE 
    col_sel_affinity = ['Region', 'Stock_Point_ID', 'CustomerID']

    col_sel_kyc = ['CustomerID', 'ContactName', 'BusinessName', 'CustomerModeName',
        'CustomerRef', 'ContactPhone', 'CustomerType', 'FullAddress', 
        'StateName', 'CityName', 'TownName', 'Latitude','Longitude', 
        'DistanceVarianceInMeter', 'IsLocationSubmitted',
        'IsLocationCaptured', 'IsLocationVerified','CustomerStatus',
        'RejectReason',  'KYC_Capture_Status',  'lastDelvDate', 
        # 'hasPOS','hasVAS', 'hasBNPL', 'lastDelvDate', 
        'isActive']

    col_sel_score = ['Stock_Point_ID', 'CustomerID', 'composite_customer_score',
        'percentile_rank', 'active_months_pct', 'avg_orders_per_active_month',
        'avg_qty_per_month', 'avg_revenue_per_month', 'days_since_last_order']

    df_master_customer_dim = (
                df_customer_dim_with_affinity_score_raw[col_sel_affinity]
                .merge(df_kyc_customer[col_sel_kyc], how='inner', on=['CustomerID'])
                .merge(df_customer_score[col_sel_score], how='left', on=['Stock_Point_ID', 'CustomerID'])
                .rename(columns = {'CityName':'LGA',
                                'TownName':'LCDA'
                                })

            )

    df_master_customer_dim['CustomerPurchaseReceny'] =  df_master_customer_dim['lastDelvDate'].apply(lambda x: (datetime.now() - x).days)
    df_master_customer_dim['KYC_Capture_Status'] = df_master_customer_dim['KYC_Capture_Status'].apply(lambda x: 'Yes' if x == 1 else 'No')
    # Fix Missing value -------------------------------------------
    for col in ['BusinessName', 'CustomerModeName', 'FullAddress', 'LGA', 'LCDA']:
        df_master_customer_dim[col] = df_master_customer_dim[col].fillna('')

    for col in ['Latitude',  'Longitude',
                'composite_customer_score',  'percentile_rank',  
                'active_months_pct',  
                'avg_orders_per_active_month',  'avg_qty_per_month',  'avg_revenue_per_month'
                ]:
        df_master_customer_dim[col] = pd.to_numeric(df_master_customer_dim[col], errors='coerce').fillna(0) 

    df_master_customer_dim = clean_invalid_coordinates(df_master_customer_dim)

    # ----------------- RECOMMENDATION
    col2 = ['EstimatedQuantity', 'CustomerSKUscore', 'CustomerSKUscoreStandardize', 'CustomerSKUscoreRank']
    for col in col2: 
        df_customer_sku_recommendation_raw[col] = pd.to_numeric(df_customer_sku_recommendation_raw[col], errors='coerce')

    df_customer_sku_recommendation_raw['LastDeliveredDate'] = pd.to_datetime(df_customer_sku_recommendation_raw['LastDeliveredDate'])
    # Get today's date
    today = pd.Timestamp.today()

    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['LastDeliveredDate'].apply(lambda x: (datetime.now() - x).days)
    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['Recency'].fillna(max(df_customer_sku_recommendation_raw['Recency']))
    
    # ----------------- STOCKPOINT
    df_stockpoint_dim_raw.rename(columns={'lattitude':'Latitude', 'longitude':'Longitude'}, inplace=True) 
    col3 = ['Latitude', 'Longitude']
    for col in col3: 
        df_stockpoint_dim_raw[col] = pd.to_numeric(df_stockpoint_dim_raw[col], errors='coerce').fillna(0)    

    # Replace invalid latitude values with NaN
    df_stockpoint_dim_raw = clean_invalid_coordinates(df_stockpoint_dim_raw)   
    

    return df_customer_sku_recommendation_raw, df_master_customer_dim, df_stockpoint_dim_raw

In [None]:
def data_filter(df_customer_sku_recommendation, df_master_customer_dim, df_stockpoint_dim,
                stockpoint_id,  sku_recency = 7, customer_recency = 90, number_recommendation = 5,
                estimate_qty_scale_factor = .90, max_estimated_qty = 5):
    
    df_customer_sku_recommendation = df_customer_sku_recommendation.copy().query(f'Stock_Point_ID == {stockpoint_id}')
    # Filter Recommendation
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['ProductTag'] != 'Standard-Inactive']
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['Medium'] != 'Never Purchased']

    
    df_master_customer_dim['valid_for_push'] = np.where(
                                                    ((df_master_customer_dim['IsLocationCaptured'] == 'Yes') + 
                                                    (df_master_customer_dim['DistanceVarianceInMeter'] <= 150.0) + 
                                                    (df_master_customer_dim['KYC_Capture_Status'] == 1) ) > 0
                                                    ,
                                                    1,
                                                    0
                                                )
    df_master_customer_dim = df_master_customer_dim[df_master_customer_dim['CustomerPurchaseReceny'] <= customer_recency]
    df_master_customer_dim = df_master_customer_dim.query('valid_for_push == 1')  
    
    # # Clipping Max Estimated Quantity to 10 qty
    df_customer_sku_recommendation['EstimatedQuantity_bck'] = df_customer_sku_recommendation['EstimatedQuantity']
    df_customer_sku_recommendation['EstimatedQuantity'] = df_customer_sku_recommendation['EstimatedQuantity'].apply(lambda x: max_estimated_qty if int((x*estimate_qty_scale_factor)) > max_estimated_qty else int((x*estimate_qty_scale_factor)) )


    # Select top 10 SKU by SKURank per customer
    df_customer_sku_recommendation = (
        df_customer_sku_recommendation
        .query('EstimatedQuantity > 1')
        .sort_values(['CustomerID','CustomerSKUscoreRank'])
        .groupby('CustomerID', group_keys=False)
        .head(number_recommendation)
        .reset_index(drop=True) 
    )

    df_customer_sku_recommendation_ = df_master_customer_dim.merge(df_customer_sku_recommendation, how='inner', on = ['CustomerID','Stock_Point_ID']) 

    df_stockpoint_dim = df_stockpoint_dim.query(f'Stock_Point_ID == {stockpoint_id}').reset_index(drop=True) 
    

    df_customer_dim = df_master_customer_dim.merge(df_customer_sku_recommendation_['CustomerID'].drop_duplicates(), how='inner', on = 'CustomerID')
    # df_customer_dim = df_customer_dim.merge(df_customer_dim_with_affinity_score[sel_cols], how='inner', on = 'CustomerID').reset_index(drop = True) 
    
    print(f'Total Quantity before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").EstimatedQuantity.sum():,}')
    print(f'Total Quantity: {df_customer_sku_recommendation_.EstimatedQuantity.sum():,}')
    print(f'Total Number of Customers before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").CustomerID.nunique():,}')
    print(f'Total Number of Customers: {df_customer_dim.CustomerID.nunique():,}')

 
    return df_customer_sku_recommendation_, df_customer_dim,   df_stockpoint_dim  

#### Deprecated Functions

In [None]:
def preprocessing_dep(df_customer_sku_recommendation_raw, df_customer_dim_with_affinity_score_raw, df_stockpoint_dim_raw) :
    df_customer_sku_recommendation_raw['Stock_Point_ID'] = df_customer_sku_recommendation_raw['Stock_Point_ID'].astype(int)
    df_customer_dim_with_affinity_score_raw['Stock_Point_ID'] = df_customer_dim_with_affinity_score_raw['Stock_Point_ID'].astype(int)
    df_stockpoint_dim_raw['Stock_Point_ID'] = df_stockpoint_dim_raw['Stock_Point_ID'].astype(int)

    col1 = ['Latitude', 'Longitude','TotalSKUs', 'AvgSKUScore', 'TotalEstimatedVolume', 'RFcount', 'HighValueSKUs', 
            'HighValueAvgScore', 'HighValueTotalScore', 'HighValueEstimatedVolume', 'ExpressSKUs', 
            'CoreSKUs', 'CustomerAffinityScore_Raw', 'CustomerAffinityScore_Standardized', 'CustomerAffinityRank']

    for col in col1: 
        df_customer_dim_with_affinity_score_raw[col] = pd.to_numeric(df_customer_dim_with_affinity_score_raw[col], errors='coerce').fillna(0)

    col2 = ['EstimatedQuantity', 'CustomerSKUscore', 'CustomerSKUscoreStandardize', 'CustomerSKUscoreRank']
    for col in col2: 
        df_customer_sku_recommendation_raw[col] = pd.to_numeric(df_customer_sku_recommendation_raw[col], errors='coerce')

    df_customer_sku_recommendation_raw['LastDeliveredDate'] = pd.to_datetime(df_customer_sku_recommendation_raw['LastDeliveredDate'])
    # Get today's date
    today = pd.Timestamp.today()

    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['LastDeliveredDate'].apply(lambda x: (datetime.now() - x).days)
    df_customer_sku_recommendation_raw['Recency'] = df_customer_sku_recommendation_raw['Recency'].fillna(max(df_customer_sku_recommendation_raw['Recency']))
    df_stockpoint_dim_raw.rename(columns={'lattitude':'Latitude', 'longitude':'Longitude'}, inplace=True) 
    col3 = ['Latitude', 'Longitude']
    for col in col3: 
        df_stockpoint_dim_raw[col] = pd.to_numeric(df_stockpoint_dim_raw[col], errors='coerce').fillna(0)   
        df_kyc_customer[col] = pd.to_numeric(df_kyc_customer[col], errors='coerce').fillna(0)   

    # Replace invalid latitude values with NaN
    df_customer_dim_with_affinity_score_raw.loc[
        (df_customer_dim_with_affinity_score_raw['Latitude'] < -90) |
        (df_customer_dim_with_affinity_score_raw['Latitude'] > 90),
        'Latitude'
    ] = 0.0

    df_customer_dim_with_affinity_score_raw.loc[
        (df_customer_dim_with_affinity_score_raw['Longitude'] < -180) |
        (df_customer_dim_with_affinity_score_raw['Longitude'] > 180),
        'Longitude'
    ] = 0.0   

    # =================================================
    # Replace invalid latitude values with NaN
    df_kyc_customer.loc[
        (df_kyc_customer['Latitude'] < -90) |
        (df_kyc_customer['Latitude'] > 90),
        'Latitude'
    ] = 0.0

    df_kyc_customer.loc[
        (df_kyc_customer['Longitude'] < -180) |
        (df_kyc_customer['Longitude'] > 180),
        'Longitude'
    ] = 0.0   

    # -------------------------
    # Active Customers with at least 1 purchase from 2025
    # Location Captured or Variance check less than 150m or place order in the last 3 months
    sel_cols_customer = ['CustomerID', 'ContactName', 'BusinessName', 
                        'CustomerModeName', 'ContactPhone', 'CustomerType', #'Location', 'Address',
                        'FullAddress', 'StateName', 'CityName', 'TownName', 'Latitude',
                        'Longitude', #'status', 
                        'DistanceVarianceInMeter', 'IsLocationSubmitted',
                        'LocationSubmittedDate', 'IsLocationCaptured', 'CustomerStatus',
                        'KYC_Capture_Status', 'lastDelvDate', 'isActive']

    df_kyc_customer['DistanceVarianceInMeter'] = df_kyc_customer['DistanceVarianceInMeter'].fillna(99999) 

    df_valid_customers = df_kyc_customer[~df_kyc_customer['lastDelvDate'].isnull()][sel_cols_customer].rename(columns={'CityName':'LGA', 'TownName':'LCDA'})
    df_valid_customers['LGA'] = df_valid_customers['LGA'].fillna('')
    df_valid_customers['LCDA'] = df_valid_customers['LCDA'].fillna('')
    df_valid_customers['CustomerPurchaseReceny'] =  df_valid_customers['lastDelvDate'].apply(lambda x: (datetime.now() - x).days)
    

    return df_customer_sku_recommendation_raw, df_customer_dim_with_affinity_score_raw, df_stockpoint_dim_raw, df_valid_customers

In [None]:
def data_filter_def(df_valid_customers, df_customer_sku_recommendation, df_customer_dim_with_affinity_score, df_stockpoint_dim, 
                stockpoint_id,  sku_recency = 7, customer_recency = 90, number_recommendation = 5,
                estimate_qty_scale_factor = .90, max_estimated_qty = 5):
    df_customer_sku_recommendation = df_customer_sku_recommendation.copy().query(f'Stock_Point_ID == {stockpoint_id}')
    # Filter Recommendation
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['ProductTag'] != 'Standard-Inactive']
    df_customer_sku_recommendation = df_customer_sku_recommendation[df_customer_sku_recommendation['Medium'] != 'Never Purchased']

    
    df_valid_customers['valid_for_push'] = np.where(
                                                    ((df_valid_customers['IsLocationCaptured'] == 'Yes') + 
                                                    (df_valid_customers['DistanceVarianceInMeter'] <= 150.0) + 
                                                    (df_valid_customers['KYC_Capture_Status'] == 1) ) > 0
                                                    ,
                                                    1,
                                                    0
                                                )
    df_valid_customers = df_valid_customers[df_valid_customers['CustomerPurchaseReceny'] <= customer_recency]
    df_valid_customers = df_valid_customers.query('valid_for_push == 1')
    # print(df_valid_customers.shape)
    # print(df_valid_customers['valid_for_push'] .value_counts()) # 14,715
    # df_valid_customers.head(1)


    # ## Selecting Customers who haven't bought any product in the last 3months
    # df_select_customers = df_customer_sku_recommendation.groupby('CustomerID').Recency.min().reset_index().query(f'Recency <= {customer_recency}')
    # df_customer_sku_recommendation = df_customer_sku_recommendation.merge(df_select_customers[['CustomerID']], how = 'inner')
    
    # ## Filter Product Bought recently
    # df_customer_sku_recommendation = df_customer_sku_recommendation.query(f'Recency > {sku_recency}')
    
    # # Clipping Max Estimated Quantity to 10 qty
    df_customer_sku_recommendation['EstimatedQuantity_bck'] = df_customer_sku_recommendation['EstimatedQuantity']
    df_customer_sku_recommendation['EstimatedQuantity'] = df_customer_sku_recommendation['EstimatedQuantity'].apply(lambda x: max_estimated_qty if int((x*estimate_qty_scale_factor)) > max_estimated_qty else int((x*estimate_qty_scale_factor)) )


    # Select top 10 SKU by SKURank per customer
    df_customer_sku_recommendation = (
        df_customer_sku_recommendation
        .query('EstimatedQuantity > 1')
        .sort_values(['CustomerID','CustomerSKUscoreRank'])
        .groupby('CustomerID', group_keys=False)
        .head(number_recommendation)
        .reset_index(drop=True) 
    )

    df_customer_sku_recommendation_ = df_valid_customers.merge(df_customer_sku_recommendation, how='inner', on = 'CustomerID') 

    df_stockpoint_dim = df_stockpoint_dim.query(f'Stock_Point_ID == {stockpoint_id}').reset_index(drop=True)

    sel_cols = ['Region', 'Stock_Point_ID', 'CustomerID','TotalEstimatedVolume', 'RFcount', 
                'HighValueSKUs', 'HighValueAvgScore', 'HighValueTotalScore', 
                'HighValueEstimatedVolume', 'ExpressSKUs',  'CoreSKUs', 'CustomerAffinityScore_Raw',
                'CustomerAffinityScore_Standardized', 'CustomerAffinityRank']
    

    df_customer_dim = df_valid_customers.merge(df_customer_sku_recommendation_['CustomerID'].drop_duplicates(), how='inner', on = 'CustomerID')
    df_customer_dim = df_customer_dim.merge(df_customer_dim_with_affinity_score[sel_cols], how='inner', on = 'CustomerID').reset_index(drop = True) 
    
    print(f'Total Quantity before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").EstimatedQuantity.sum():,}')
    print(f'Total Quantity: {df_customer_sku_recommendation.EstimatedQuantity.sum():,}')
    print(f'Total Number of Customers before filter: {df_customer_sku_recommendation.query(f"Stock_Point_ID == {stockpoint_id}").CustomerID.nunique():,}')
    print(f'Total Number of Customers: {df_customer_dim.CustomerID.nunique():,}')


    # df_customer_dim = df_customer_dim_with_affinity_score.merge(df_customer_sku_recommendation[['CustomerID']].drop_duplicates(), how='inner', on='CustomerID')
    # df_customer_sku_recommendation = df_customer_sku_recommendation.merge(df_customer_dim[['CustomerID','StateName', 'Region',
    #    'Latitude', 'Longitude', 'LGA', 'LCDA']].drop_duplicates(), how='inner', on='CustomerID')

    return df_customer_sku_recommendation_, df_customer_dim,   df_stockpoint_dim #df_customer_sku_recommendation.reset_index(drop=True), df_customer_dim, df_stockpoint_dim

## **Preprocessing**

In [None]:
# df_kyc_customer.query('CustomerID  == 3899753')#['CustomerCreatedDate']

In [None]:
df_customer_sku_recommendation, df_master_customer_dim, df_stockpoint_dim = preprocessing(df_customer_sku_recommendation_raw, 
                                                                                                        df_customer_dim_with_affinity_score_raw, 
                                                                                                        df_stockpoint_dim_raw,
                                                                                                        df_customer_score,
                                                                                                        df_kyc_customer)

In [None]:
# df_customer_score[['composite_customer_score', 'percentile_rank']].head(4)

In [None]:
# df_customer_dim_with_affinity_score_raw[['CustomerAffinityScore_Standardized', 'CustomerAffinityRank']].head(5)

In [None]:
causeway, causeway_customer_dim, causeway_stockpoint, = data_filter(df_customer_sku_recommendation, 
                                                                    df_master_customer_dim, 
                                                                    df_stockpoint_dim, stockpoint_id = 1647113,  
                                                                    sku_recency = 7, customer_recency = 90, number_recommendation = 5,
                                                                    estimate_qty_scale_factor = 1, max_estimated_qty = 5)

# print('Total Quantity b4 filter: ',df_customer_sku_recommendation.query('Stock_Point_ID == 1647113').EstimatedQuantity.sum()) #51,409 
# print('Total Quantity: ',causeway.EstimatedQuantity.sum()) #15,613
# print('Total Number of Customers b4 filter: ',df_customer_sku_recommendation.query('Stock_Point_ID == 1647113').CustomerID.nunique()) #51,409 
# print('Total Number of Customers: ', causeway_customer_dim.CustomerID.nunique()) #15,613


depot_location = [causeway_stockpoint.Latitude[0], causeway_stockpoint.Longitude[0]]

In [None]:
print(causeway.shape)
print(causeway_customer_dim.shape)
print(causeway_stockpoint.shape)

In [None]:
causeway_customer_dim.columns

## **Routing**

### Testing

In [None]:
stock_point_id =  1647113
sku_recency = 7
customer_recency = 60
number_recommendation = 5
estimate_qty_scale_factor = 1
max_estimated_qty = 5
max_customers_per_route=20
max_volume_per_route=300
max_distance_km = 50


df_sku_rec, df_customer_dim, df_stockpoint  = data_filter(df_customer_sku_recommendation, 
                                                            df_master_customer_dim, 
                                                            df_stockpoint_dim, 
                                                            stockpoint_id = stock_point_id,  
                                                            sku_recency = sku_recency, customer_recency = customer_recency, number_recommendation = number_recommendation,
                                                            estimate_qty_scale_factor = estimate_qty_scale_factor, max_estimated_qty = max_estimated_qty)


print(f"✓ Loaded {len(df_sku_rec)} SKU recommendations")
print(f"✓ Loaded {len(df_customer_dim)} customer records")
print(f"✓ Loaded {len(df_stockpoint)} stock points")

optimizer = RouteOptimizer(
    max_customers_per_route=max_customers_per_route,
    max_volume_per_route=max_volume_per_route,
    max_distance_km = max_distance_km
)

optimizer.load_data(df_sku_rec, df_customer_dim, df_stockpoint)
print("✓ Route optimizer initialized")

# STEP 3: Generate Routes for Stock Point 1647113
print("\n3. Generating Optimized Routes...")
print("-" * 40) 

stock_point = df_stockpoint[
df_stockpoint['Stock_Point_ID'] == stock_point_id
].iloc[0]
stock_point_coords = (stock_point['Latitude'], stock_point['Longitude'])


In [None]:
clustering_customers_df = optimizer.filter_customers_for_stockpoint(stock_point_id)

df_clustering, n_clusters = optimizer.create_geographic_clusters(clustering_customers_df, clustering_method = 'divisive')
print(f'Number of clusters {n_clusters}')

In [None]:
routes = optimizer.generate_multi_trip_routes(stock_point_id, max_trips=5, clustering_method='divisive')
df_routes = pd.DataFrame(routes)

In [None]:
df_routes.columns

### Function

In [None]:
def main(stock_point_id, sku_recency = 7, 
         customer_recency = 60, number_recommendation = 5, 
         estimate_qty_scale_factor = 1, max_estimated_qty = 5,
         max_customers_per_route=20,
        max_volume_per_route=300,
        max_distance_km = 40,
        verbose = False):
    """
    Main execution function demonstrating complete route optimization workflow
    """ 

    print("=" * 80)
    print("ROUTE OPTIMIZATION FOR PUSH SALES RECOMMENDATIONS")
    print("=" * 80)

    # STEP 1: Load or Generate Data
    print("\n1. Loading Data...")
    print("-" * 40)

    # Load data into optimizer 

    # df_sku_rec, df_customer_dim, df_stockpoint = data_filter(df_customer_sku_recommendation, df_customer_dim_with_affinity_score, 
    #                                                                 df_stockpoint_dim, stockpoint_id = stock_point_id,  
    #                                                                 sku_recency = sku_recency, customer_recency = customer_recency, number_recommendation = number_recommendation,
    #                                                                 estimate_qty_scale_factor = estimate_qty_scale_factor, max_estimated_qty = max_estimated_qty)
    
    df_sku_rec, df_customer_dim, df_stockpoint  = data_filter(df_customer_sku_recommendation, 
                                                                df_master_customer_dim, 
                                                                df_stockpoint_dim, 
                                                                stockpoint_id = stock_point_id,  
                                                                sku_recency = sku_recency, customer_recency = customer_recency, number_recommendation = number_recommendation,
                                                                estimate_qty_scale_factor = estimate_qty_scale_factor, max_estimated_qty = max_estimated_qty)


    print(f"✓ Loaded {len(df_sku_rec)} SKU recommendations")
    print(f"✓ Loaded {len(df_customer_dim)} customer records")
    print(f"✓ Loaded {len(df_stockpoint)} stock points")

    optimizer = RouteOptimizer(
        max_customers_per_route=max_customers_per_route,
        max_volume_per_route=max_volume_per_route,
        max_distance_km = max_distance_km
    )

    optimizer.load_data(df_sku_rec, df_customer_dim, df_stockpoint)
    print("✓ Route optimizer initialized")

    # STEP 3: Generate Routes for Stock Point 1647113
    print("\n3. Generating Optimized Routes...")
    print("-" * 40) 

    stock_point = df_stockpoint[
    df_stockpoint['Stock_Point_ID'] == stock_point_id
    ].iloc[0]
    stock_point_coords = (stock_point['Latitude'], stock_point['Longitude'])
        
    clustering_customers_df = optimizer.filter_customers_for_stockpoint(stock_point_id)

    df_clustering, n_clusters = optimizer.create_geographic_clusters(clustering_customers_df, clustering_method = 'divisive')

    routes = optimizer.generate_multi_trip_routes(stock_point_id, max_trips=5, clustering_method='divisive')
    df_routes = pd.DataFrame(routes)


    # STEP 4: Analyze Results
    print("\n4. Route Analysis & Results...")
    print("-" * 40)

    if routes:
        df_routes = pd.DataFrame(routes)
        
        print(f"✓ Generated {len(df_routes)} customer visits")
        print(f"✓ Number of trips: {df_routes['TripNumber'].max()}")
        print(f"✓ Total volume: {df_routes['EstimatedQuantity'].sum()} units")
        print(f"✓ Average priority score: {df_routes['PriorityScore'].mean():.3f}")
        
        # Trip-wise breakdown
        print("\nTrip Breakdown:")
        trip_summary = df_routes.groupby(['PLANID', 'TripNumber']).agg({
            'CustomerID': 'count',
            'EstimatedQuantity': 'sum',
            'percentile_rank': 'mean',
            'PriorityScore': 'mean'
        }).round(3)
        
        trip_summary.columns = ['Customers', 'Volume', 'Avg_Affinity_Rank', 'Avg_Priority_Score']
        print(trip_summary)
        
        # STEP 5: Display Route Details
        print("\n5. Detailed Route Plans...")
        print("-" * 40)

        if verbose == True:
            for trip_num in sorted(df_routes['TripNumber'].unique()):
                trip_data = df_routes[df_routes['TripNumber'] == trip_num]
                print(f"\n*** TRIP {trip_num} - {trip_data.iloc[0]['PLANID']} ***")
                print(f"Customers: {len(trip_data)} | Volume: {trip_data['EstimatedQuantity'].sum()} | "
                        f"Avg Priority: {trip_data['PriorityScore'].mean():.3f}")
                
                print("\nRoute Sequence:")
                for idx, row in trip_data.iterrows():
                    print(f"  {row['Sequence']}. {row['CustomerID']} | "
                            f"{row['CustomerName'][:20]} | "
                            f"Vol: {row['EstimatedQuantity']} | "
                            f"Priority: {row['PriorityScore']} | "
                            f"Rank: {row['percentile_rank']}")
                
                # STEP 6: Export Results
                print("\n6. Exporting Results...")
                print("-" * 40)
                
                # Save detailed route plan
                output_filename = f'route_plan_SP{stock_point_id}_{datetime.today().date()}.csv'
                df_routes.to_csv(output_filename, index=False)
                print(f"✓ Detailed route plan saved to: {output_filename}")
                
                # Save summary
                summary_filename = f'route_summary_SP{stock_point_id}_{datetime.today().date()}.csv'
                trip_summary.to_csv(summary_filename)
                print(f"✓ Route summary saved to: {summary_filename}")
                
                # STEP 7: Key Insights
                print("\n7. Key Insights & Recommendations...")
                print("-" * 40)
                
                total_customers = len(df_routes)
                total_volume = df_routes['EstimatedQuantity'].sum()
                avg_customers_per_trip = total_customers / df_routes['TripNumber'].max()
                
                print(f"• Route Efficiency: {avg_customers_per_trip:.1f} customers per trip")
                print(f"• Volume Utilization: {(total_volume/df_routes['TripNumber'].max()/200)*100:.1f}% of capacity")
                print(f"• Priority Focus: {(df_routes['PriorityScore'] > 0.5).sum()} high-priority customers selected")
                
                # Geographic spread
                lat_range = df_routes['Latitude'].max() - df_routes['Latitude'].min()
                lon_range = df_routes['Longitude'].max() - df_routes['Longitude'].min()
                print(f"• Geographic Coverage: {lat_range:.3f}° lat × {lon_range:.3f}° lon")
            else:
                pass
        
        push_recommendation = df_sku_rec.merge(df_clustering[['Stock_Point_ID','CustomerID', 'cluster']], how='inner', on =['Stock_Point_ID','CustomerID'] )
        return push_recommendation, df_clustering, df_routes, trip_summary, stock_point_coords, df_stockpoint
    
    else:
        print("❌ No routes generated. Check data and constraints.")
        return None, None


### Run

In [None]:
df_customer_sku_recommendation.head(1)

In [None]:
causeway_push_recommendation, df_clustering, df_routes, trip_summary, stock_point_coords, df_stockpoint  = main(stock_point_id = 1647113, sku_recency = 7, 
                                                                            customer_recency = 60, number_recommendation = 5, 
                                                                            estimate_qty_scale_factor = 1, max_estimated_qty = 5,
                                                                            max_customers_per_route=20,
                                                                            max_volume_per_route=300,
                                                                            max_distance_km = 100,
                                                                            verbose = False)

# ETA: 2mins

#

In [None]:
evaluate_unsupervised_clustering(df_clustering)

In [None]:
# causeway_push_recommendation.LGA.unique()
# causeway_customer_dim[causeway_customer_dim['LGA'].isnull()]



In [None]:
# print(causeway_push_recommendation.columns.to_list())
# df_routes.head(3)

#### Maps

In [None]:
# df_routes.head(100)#.cluster.unique

map_name_all_rec = f'./recommendation_output/trip_test_tobedeleted{datetime.today().date()}.html'
map_clusters = vis_and_save(df_routes.rename(columns={'cluster':'cluster_bck'}).rename(columns={'TripNumber':'cluster'}), causeway_stockpoint, filename=map_name_all_rec)

map_clusters

In [None]:
map_name_all_rec = f'./recommendation_output/causeway_push_recommendation_cluster_{datetime.today().date()}.html'
map_clusters = vis_and_save(df_clustering, causeway_stockpoint, filename=map_name_all_rec)

map_clusters

In [None]:
causeway_push_recommendation_summary.head(20)

In [None]:
causeway_push_recommendation_summary.columns

### Select Cluster to Target

In [None]:
causeway_push_recommendation_summary = causeway_push_recommendation.groupby('cluster').agg(
    LGA_list = ('LGA', lambda x: x.unique().tolist()),
    LCDA_List = ('LCDA', lambda x: x.unique().tolist()),
    ncustomer = ('CustomerID','nunique'),
    totalQty = ('EstimatedQuantity','sum'), 
    avg_customer_score = ('composite_customer_score','mean'),
).reset_index().sort_values(['avg_customer_score','ncustomer', 'totalQty'], ascending=[False, False, False])

# for x in causeway_push_recommendation_summary.head(10).LGA:
#     print(x) 


for x in causeway_push_recommendation_summary.head(10).LCDA_List:
    print(x) 


causeway_push_recommendation_summary.head(10)

In [None]:
# # Sample list of LCDA targets
# target_lcdas = [
#     'Ajeromi Ifelodun - Ajegunle',
#     'Surulere - Aguda',
#     'Surulere - Ijesha',
#     'Surulere - Orile Iganmu',
#     'Mushin - Mushin Market',
#     'Apapa - Ijora',
#     'Ajeromi Ifelodun - Sari Iganmu',
#     'Ajeromi Ifelodun - Suru-alaba',
#     'Ajeromi Ifelodun - Olodi',
#     'Ajeromi Ifelodun - Boundary'
# ]
 


# (
#     causeway_push_recommendation.groupby(['cluster','LGA','LCDA'])
#     .agg(CustomerCount = ('CustomerID','nunique'))
#     .reset_index()
#     .query(f'LCDA in {target_lcdas}')
#     .sort_values('CustomerCount', ascending = [False])
# )
num_clus = 5
df_high_value_cluster = causeway_push_recommendation_summary.query('ncustomer > 4').head(10).reset_index(drop = True)
sel_cluster_tuple = df_high_value_cluster.cluster[0:num_clus].to_list()
print(sel_cluster_tuple)
print(df_high_value_cluster.head(num_clus).ncustomer.sum())
df_high_value_cluster.head(10)

In [None]:
# df_sel_clust.shape

In [None]:
 # Select cluster 37
df_sel_clust = df_clustering.query(f'cluster in {sel_cluster_tuple}').query('Latitude > 0')

# Ensure coordinates are in [longitude, latitude] for ORS
coords = [[lon, lat] for lat, lon in zip(df_sel_clust.Latitude, df_sel_clust.Longitude)]

# Convert depot_location to ORS format
# Assuming depot_location is [lat, lon], flip to [lon, lat]
vehicle_start = [depot_location[1], depot_location[0]]

In [None]:
depot_location = [causeway_stockpoint.Latitude[0], causeway_stockpoint.Longitude[0]]
depot_name = causeway_stockpoint.Stock_point_Name[0]

map_clusters_route = create_enhanced_cluster_map(
    df_sel_clust,
    popup_cols=['CustomerID', 'LGA', 'LCDA'],
    tooltip_cols=['LGA', 'LCDA'], 
    zoom_start=10, 
    radius=10
).add_child(folium.Marker(location=depot_location, 
                        size = 10, 
                        tooltip=depot_name, 
                        icon=folium.Icon(color="green", 
                        icon="home"))) 

# Print number of jobs
print("Number of customer locations:", len(coords))

# Define vehicles (capacity=5, starting and ending at depot)
# vehicles = [
#     ors.optimization.Vehicle(id=0, profile='driving-car', start=vehicle_start, end=vehicle_start, capacity=[20]),
#     ors.optimization.Vehicle(id=1, profile='driving-car', start=vehicle_start, end=vehicle_start, capacity=[20]),
#     ors.optimization.Vehicle(id=2, profile='driving-car', start=vehicle_start, end=vehicle_start, capacity=[20]),
#     ors.optimization.Vehicle(id=3, profile='driving-car', start=vehicle_start, end=vehicle_start, capacity=[20]),
#     ors.optimization.Vehicle(id=4, profile='driving-car', start=vehicle_start, end=vehicle_start, capacity=[20]),
# ]
num_vehicles = math.floor(df_high_value_cluster.head(num_clus).ncustomer.sum() / 20)
vehicles = [
    ors.optimization.Vehicle(
        id=i,
        profile='driving-car',
        start=vehicle_start,
        end=vehicle_start,
        capacity=[20]
    ) for i in range(num_vehicles)
]
# print(len(vehicles))
# Define jobs (each customer gets amount=[1])
jobs = [ors.optimization.Job(id=index, location=coord, amount=[1]) for index, coord in enumerate(coords)]

# Call ORS optimization API
optimized = client.optimization(jobs=jobs, vehicles=vehicles, geometry=True)

# Result is in 'optimized' JSON


In [None]:
line_colors = ['green', 'orange', 'blue', 'yellow']
for route in optimized['routes']:
    folium.PolyLine(locations=[list(reversed(coords)) for coords in ors.convert.decode_polyline(route['geometry'])['coordinates']], color=line_colors[route['vehicle']]).add_to(map_clusters_route)

map_clusters_route

#### Export

In [None]:
def prep_selected_trip(push_recommendation, df_high_value_cluster, 
                df_master_customer_dim,  df_stockpoint):
    
    sel_columns = ['Stock_Point_ID', 
                'StateName', # 'Region', 
                'Latitude', 'Longitude', 'LGA', 'LCDA', 'cluster', 
                'CustomerID', 'SKUID', 'ProductName', 'Output',
                'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
                'EstimatedQuantity', 
                # 'CustomerSKUscoreRank'
                ]

    sel_cols_cust= ['Stock_Point_ID', 'CustomerID', 'ContactName',  'CustomerModeName',   'ContactPhone', 'FullAddress', 
                    'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status']

    final_cols = ['Stock_Point_ID', 'Stock_point_Name', 'TripID', 
                'LGA_list', 'LCDA_List', 'ncustomer', 'totalQty','avg_customer_score', 'CustomerID', 'ContactName',  
                'CustomerModeName',   'ContactPhone', 'FullAddress',
                'Latitude', 'Longitude', 'LGA', 'LCDA',   
                'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status', 
                'CustomerID', 'SKUID', 'ProductName', 'Output',
                'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
                'EstimatedQuantity',
                ]
    
    push_recommmendation_trip = (
                                push_recommendation[push_recommendation['cluster'].isin(list(df_sel_clust.cluster.unique()))][sel_columns]
                                .merge(df_high_value_cluster, how='left', on = 'cluster' )
                                .merge(df_master_customer_dim[sel_cols_cust], how='left', on = ['Stock_Point_ID', 'CustomerID'])
                                .merge(df_stockpoint[['Stock_Point_ID', 'Stock_point_Name']], how='left', on = ['Stock_Point_ID'])
                                .rename(columns={'cluster':'TripID'})
                                [final_cols]
                                )

    return push_recommmendation_trip

In [None]:
sel_columns = ['Stock_Point_ID', 
 'StateName', # 'Region', 
 'Latitude', 'Longitude', 'LGA', 'LCDA', 'cluster', 
 'CustomerID', 'SKUID', 'ProductName', 'Output',
'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
'EstimatedQuantity', 
# 'CustomerSKUscoreRank'
]

sel_cols_cust= ['Stock_Point_ID', 'CustomerID', 'ContactName',  'CustomerModeName',   'ContactPhone', 'FullAddress', 
 'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status']

final_cols = ['Stock_Point_ID', 'Stock_point_Name', 'TripID', 
              'LGA_list', 'LCDA_List', 'ncustomer', 'totalQty','avg_customer_score', 'CustomerID', 'ContactName',  
              'CustomerModeName',   'ContactPhone', 'FullAddress',
              'Latitude', 'Longitude', 'LGA', 'LCDA',   
                'composite_customer_score', 'percentile_rank', 'FullAddress', 'KYC_Capture_Status', 
                'CustomerID', 'SKUID', 'ProductName', 'Output',
                'LastDeliveredDate', 'Recency', 'InventoryCheck', 'ProductTag', 'Medium',
                'EstimatedQuantity',
                ]
causeway_push_recommmendation_trip = (
                            causeway_push_recommendation[causeway_push_recommendation['cluster'].isin(list(df_sel_clust.cluster.unique()))][sel_columns]
                            .merge(df_high_value_cluster, how='left', on = 'cluster' )
                            .merge(df_master_customer_dim[sel_cols_cust], how='left', on = ['Stock_Point_ID', 'CustomerID'])
                            .merge(df_stockpoint[['Stock_Point_ID', 'Stock_point_Name']], how='left', on = ['Stock_Point_ID'])
                            .rename(columns={'cluster':'TripID'})
                            [final_cols]
                            )



filename = f'./recommendation_output/causeway_push_recommendation_{datetime.today().date()}.xlsx'
map_name_trip = f'./recommendation_output/causeway_push_recommendation_trip_{datetime.today().date()}.html'
with pd.ExcelWriter(filename) as writer:
        causeway_push_recommendation_trip.to_excel(writer, sheet_name='Selected Trip', index=False)
        causeway_push_recommendation.to_excel(writer, sheet_name='All Recommendation', index=False)
        causeway_push_recommendation_summary.to_excel(writer, sheet_name='Recommendation Summary', index=False)
# causeway_push_recommendation_trip.rename(columns={'cluster':'TripID'}, inplace=True)

In [None]:
# df_master_customer_dim.columns
causeway_push_recommendation_trip.columns



In [None]:
# df_high_value_cluster.columns

In [None]:
# df_high_value_cluster

# Main Recommendation

In [61]:
def run_push_recommendation(df_customer_sku_recommendation, 
                            df_master_customer_dim, 
                            df_stockpoint_dim, 
                            stock_point_id,
                            stock_point_name,
                            sku_recency = 7, 
                            customer_recency = 60, number_recommendation = 5, 
                            estimate_qty_scale_factor = 1, max_estimated_qty = 5,
                            max_customers_per_route=20,
                            max_volume_per_route=300,
                            max_distance_km = 40,
                            sel_trip_cluster = 5,
                            clustering_method = 'divisive'):
    """
    Main execution function demonstrating complete route optimization workflow
    """ 

    print("=" * 80)
    print("ROUTE OPTIMIZATION FOR PUSH SALES RECOMMENDATIONS")
    print(f"StockPoint: {stock_point_name}, StockPointID: {stock_point_id},")
    print("=" * 80)

    # STEP 1: Load or Generate Data
    print("\n1. Loading Data...")
    print("-" * 40)

    df_sku_rec, df_customer_dim, df_stockpoint  = data_filter(df_customer_sku_recommendation, 
                                                                df_master_customer_dim, 
                                                                df_stockpoint_dim, 
                                                                stockpoint_id = stock_point_id,  
                                                                sku_recency = sku_recency, 
                                                                customer_recency = customer_recency, 
                                                                number_recommendation = number_recommendation,
                                                                estimate_qty_scale_factor = estimate_qty_scale_factor, 
                                                                max_estimated_qty = max_estimated_qty)


    print(f"✓ Loaded {len(df_sku_rec)} SKU recommendations")
    print(f"✓ Loaded {len(df_customer_dim)} customer records")
    print(f"✓ Loaded {len(df_stockpoint)} stock points")

    push_recommendation, df_clustering, df_routes, stock_point_coords = cluster_trip_route(df_sku_rec, 
                                                                                            df_customer_dim, 
                                                                                            df_stockpoint,
                                                                                            stock_point_id,
                                                                                            max_customers_per_route, 
                                                                                            max_volume_per_route,
                                                                                            max_distance_km,
                                                                                            clustering_method)
        
    ### Cluster Evaluation
    evaluate_unsupervised_clustering(df_clustering)

    ### Trip Maps
    try:
        trip_map_path = f'./recommendation_output/trip_map/{stock_point_name}_{datetime.today().date()}.html' 
        map_clusters = vis_and_save(df_routes= (df_routes
                                            .rename(columns={'cluster':'cluster_bck'})
                                            .rename(columns={'TripNumber':'cluster'})
                                            ), 
                                    df_stockpoint=df_stockpoint, 
                                    filename=map_name_all_rec)
    except Exception as e:
        print(f'Unable to save the generated map image: {e}')

    ### Cluster Summary 
    cluster_summary, df_high_value_cluster_summary, sel_total_customer_count = cluster_summary_and_selection(
                                  push_recommendation,
                                  sel_trip_cluster,
                                  )

    ## Route Optimization
    try:
        run_route_optimizer(df_clustering, sel_cluster_tuple, df_stockpoint, 
                        stock_point_name,
                        sel_total_customer_count, 
                        capacity_size = 20)
    except Exception as e:
        print(f'Unable to generate route mapping using orc: {e}')

    ## Trip
    push_recommendation_trip = prep_selected_trip(push_recommendation, 
                                                  df_high_value_cluster_summary, 
                                                  df_master_customer_dim,  
                                                  df_stockpoint,
                                                  sel_cluster_tuple)
    
    ### Export Data
    try:
        export_data(
                selected_trip = push_recommendation_trip,
                all_push_recommendation = push_recommendation,
                cluster_summary = cluster_summary,
                stock_point_name = stock_point_name
            )
    except Exception as e:
        print(f'Unable to generate route mapping using orc: {e}')

    dict_ = {
        'stock_point_name': stock_point_name,
        'selected_trip': push_recommendation_trip,
        'all_push_recommendation': push_recommendation,
        'cluster_summary': cluster_summary
    }

    return dict_
    #push_recommendation, df_clustering, df_routes, trip_summary, stock_point_coords, df_stockpoint