# MAIN

In [None]:

%load_ext autoreload
%autoreload 2

import os
import shutil
from pathlib import Path
import pandas as pd 
import logging
from src.data.get_data import get_all_input_data #get_data_reco_custdim_spdim , get_kyc_customers, get_customer_score
from datetime import datetime, timedelta 
from src.data.preprocessing import preprocessing
from src.routing.ValhallaManager import ValhallaManager
from src.data.data_filter import data_filter 
from src.main import run_push_recommendation 

from src.data.export import export_data 
from src.data.get_connection import get_connection
from src.clustering.evaluate_cluster import evaluate_unsupervised_clustering
from src.utils_and_postprocessing.utils import cluster_summary_and_selection, postprocess_selected_trip
from src.utils_and_postprocessing.run_clustering_and_routing import create_and_plot_route, create_cluster_trip_optroute 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
%reload_ext autoreload

### Logger Set-up

In [None]:
import logging
import os # Good practice to import os for path manipulation, especially for log files

# Get the logger instance for the current module
logger = logging.getLogger(__name__)

# Set the overall logging level for the logger.
# Messages below this level will be ignored by this logger.
logger.setLevel(logging.INFO)

# --- 1. File Handler (for logging to a file) ---
log_file_path = 'test.log' # You might want to make this dynamic later
file_handler = logging.FileHandler(log_file_path)
file_handler.setLevel(logging.INFO) # Set the level for this handler
file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

# --- 2. Stream Handler (for logging to the console/notebook output) ---
stream_handler = logging.StreamHandler() # By default, this logs to sys.stderr
stream_handler.setLevel(logging.INFO) # Set the level for this handler
stream_formatter = logging.Formatter('%(levelname)s: %(message)s') # Often a simpler format for console
stream_handler.setFormatter(stream_formatter)
logger.addHandler(stream_handler)


### Parameter Setup

In [5]:
# Constants for directory structure
# BASE_DIR = Path(__file__).resolve().parent
BASE_DIR = Path('').resolve()#.parent
RECOMMENDATION_DIR = BASE_DIR / 'recommendation_output'
SELECTED_TRIP_DIR = 'selected_trip_map'
CLUSTER_MAP_DIR = 'cluster_map'
EXCEL_DOCS_DIR = 'excel_docs'
INDEX_HTML = 'index.html'

# Global variables
CURRENT_DATE = None
SELECTED_TRIP_PATH = None
ALL_CLUSTER_PATH = None
LOCAL_EXCEL_PATH = None

def setup_directories():
    """Set up directory structure for recommendation output with date-based subdirectories."""
    global CURRENT_DATE, SELECTED_TRIP_PATH, ALL_CLUSTER_PATH, LOCAL_EXCEL_PATH
    try:
        # Set current date
        CURRENT_DATE = datetime.today().date()
        
        # Define date-based directory paths
        SELECTED_TRIP_PATH = RECOMMENDATION_DIR / SELECTED_TRIP_DIR / str(CURRENT_DATE)
        ALL_CLUSTER_PATH = RECOMMENDATION_DIR / CLUSTER_MAP_DIR / str(CURRENT_DATE)
        LOCAL_EXCEL_PATH = RECOMMENDATION_DIR / EXCEL_DOCS_DIR / str(CURRENT_DATE)
        
        # Create directories if they don't exist
        for directory in (SELECTED_TRIP_PATH, ALL_CLUSTER_PATH, LOCAL_EXCEL_PATH):
            directory.mkdir(parents=True, exist_ok=True)
            logger.info(f"Ensured directory exists: {directory}")

        # Copy index.html files from source directories to date-based subdirectories
        source_cluster_map_index = BASE_DIR / 'html' / 'default_cluster_map_index.html'
        source_selected_trip_index = BASE_DIR / 'html' / 'default_selected_cluster_map_index.html' 

        # Copy index.html for selected_trip_map
        try:
            if source_selected_trip_index.exists():
                shutil.copy2(source_selected_trip_index, SELECTED_TRIP_PATH / INDEX_HTML)
                logger.info(f"Copied index.html to {SELECTED_TRIP_PATH / INDEX_HTML}")
            else:
                logger.warning(f"Source index.html not found at {source_selected_trip_index}")
        except Exception as e:
            logger.error(f"Failed to copy index.html to {SELECTED_TRIP_PATH}: {str(e)}")

        # Copy index.html for cluster_map
        try:
            if source_cluster_map_index.exists():
                shutil.copy2(source_cluster_map_index, ALL_CLUSTER_PATH / INDEX_HTML)
                logger.info(f"Copied index.html to {ALL_CLUSTER_PATH / INDEX_HTML}")
            else:
                logger.warning(f"Source index.html not found at {source_cluster_map_index}")
        except Exception as e:
            logger.error(f"Failed to copy index.html to {ALL_CLUSTER_PATH}: {str(e)}")

    except Exception as e:
        logger.error(f"Error setting up directories: {str(e)}")
        raise

if __name__ == "__main__":
    # Set up basic logging configuration
    logging.basicConfig(level=logging.INFO)
    setup_directories()

INFO: Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/selected_trip_map/2025-07-01
INFO:__main__:Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/selected_trip_map/2025-07-01
INFO: Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/cluster_map/2025-07-01
INFO:__main__:Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/cluster_map/2025-07-01
INFO: Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/excel_docs/2025-07-01
INFO:__main__:Ensured directory exists: /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/excel_docs/2025-07-01
INFO: Copied index.html to /home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/selected_trip_map/2025-07-01/index.html
INFO:__main__:Copied index.html to /home/azureuser/BT/11_Demand_Engine/A

In [None]:
# list all files in the SELECTED_TRIP_PATH directory stripping the extension
def list_files_in_directory(directory):
    """List all files in the given directory, stripping the file extension."""
    try:
        files = [f.stem for f in directory.glob('*') if f.is_file()]
        # logger.info(f"Files in {directory}: {files}")
        return files
    except Exception as e:
        logger.error(f"Error listing files in directory {directory}: {str(e)}")
        return []


In [5]:
# Instantiate the manager
valhalla_manager = ValhallaManager(logger=logger)

# Start the server
valhalla_manager.start_valhalla()

# Check valhalla status
valhalla_manager.check_valhalla_status()

Starting Valhalla container...
 Network valhalla_nigeria_project_default  Creating
 Network valhalla_nigeria_project_default  Created
 Container valhalla_nigeria_project-valhalla-1  Creating
 Container valhalla_nigeria_project-valhalla-1  Created
 Container valhalla_nigeria_project-valhalla-1  Starting
 Container valhalla_nigeria_project-valhalla-1  Started

Waiting 10 seconds for Valhalla to initialize...
Valhalla container started.


True

## Main Function

## MAIN 1

In [18]:
# Get input data
df_customer_sku_recommendation_raw, df_customer_dim_with_affinity_score_raw, \
    df_stockpoint_dim_raw, df_kyc_customer, df_customer_score = get_all_input_data(logger=logger)

INFO: Executing stored procedure(s) to fetch data...
INFO:__main__:Executing stored procedure(s) to fetch data...


INFO: Data fetch complete.
INFO:__main__:Data fetch complete.
INFO: --- Fetched DataFrames Shapes ---
INFO:__main__:--- Fetched DataFrames Shapes ---
INFO: Customer SKU Recommendation: (154592, 13)
INFO:__main__:Customer SKU Recommendation: (154592, 13)
INFO: Customer Dimension with Affinity Score: (10782, 22)
INFO:__main__:Customer Dimension with Affinity Score: (10782, 22)
INFO: Stockpoint Dimension: (72, 4)
INFO:__main__:Stockpoint Dimension: (72, 4)
INFO: ---------------------------------

INFO:__main__:---------------------------------

INFO: Connecting to database and fetching KYC customer data...
INFO:__main__:Connecting to database and fetching KYC customer data...
INFO: Successfully fetched KYC customer data. Shape: (343144, 57)
INFO:__main__:Successfully fetched KYC customer data. Shape: (343144, 57)
INFO: KYC customers DataFrame shape: (343144, 57)
INFO:__main__:KYC customers DataFrame shape: (343144, 57)
INFO: Connecting to database and fetching customer score data...
INFO:

In [None]:
# # Development: save the inputs to feather for quick access
# input_data_path = BASE_DIR / 'input'
# # df_customer_sku_recommendation_raw, df_customer_dim_with_affinity_score_raw, \
#     # df_stockpoint_dim_raw, df_kyc_customer, df_customer_score
    
# # Save the dataframes to feather files
# df_customer_sku_recommendation_raw.to_feather(input_data_path / 'df_customer_sku_recommendation_raw.feather')
# df_customer_dim_with_affinity_score_raw.to_feather(input_data_path / 'df_customer_dim_with_affinity_score_raw.feather')
# df_stockpoint_dim_raw.to_feather(input_data_path / 'df_stockpoint_dim_raw.feather')
# df_kyc_customer.to_feather(input_data_path / 'df_kyc_customer.feather')
# df_customer_score.to_feather(input_data_path / 'df_customer_score.feather')



In [21]:
# Preprocessing
df_customer_sku_recommendation, df_master_customer_dim, df_stockpoint_dim = preprocessing(df_customer_sku_recommendation_raw, 
                                                                                            df_customer_dim_with_affinity_score_raw, 
                                                                                            df_stockpoint_dim_raw,
                                                                                            df_customer_score,
                                                                                            df_kyc_customer)

In [22]:
# Data Filter - Testing 
df_sku_rec, df_customer_dim, df_stockpoint = data_filter(df_customer_sku_recommendation, 
                                                                    df_master_customer_dim, 
                                                                    df_stockpoint_dim, 
                                                                    stockpoint_id = 1647394,  
                                                                    # stockpoint_id = 1647113,  
                                                                    sku_recency = 7, customer_recency = 60, number_recommendation = 10,
                                                                    estimate_qty_scale_factor = 1, max_estimated_qty = 5, 
                                                                    exclude_recency_customer = 4)

Total Quantity before filter: 16,131
Total Quantity: 11,998
Total Number of Customers before filter: 476
Total Number of Customers: 353


In [1]:
# # stock_point_id = 1647394 #
# stock_point_id =  1647113
# stock_point_name = df_stockpoint_dim.query(f'Stock_Point_ID == {1647394}')['Stock_point_Name'].iloc[0] 
# res_dict = run_push_recommendation(df_customer_sku_recommendation, 
#                             df_master_customer_dim, 
#                             df_stockpoint_dim, 
#                             stock_point_id,
#                             stock_point_name,
#                             sku_recency = 7, 
#                             customer_recency = 60, number_recommendation = 5, 
#                             estimate_qty_scale_factor = 1, max_estimated_qty = 5, 
#                             exclude_recency_customer = 4,
#                             max_customers_per_route=20,
#                             max_volume_per_route=300,
#                             max_distance_km = 40,
#                             sel_trip_cluster = 5,
#                             min_ncust_per_cluster = 5,
#                             clustering_method = 'divisive',
#                             skip_route_optimization = False,
#                             save_to_disk = False,
#                             logger=logger)

In [None]:
ALL_STOCKPOINTS_RESULT = {}
for index, row in df_stockpoint_dim.iterrows():
    # if index == 12:
    # if index == 5:
    stock_point_id =  row['Stock_Point_ID']
    stock_point_name = row['Stock_point_Name']
    print(f'{index}/{len(df_stockpoint_dim)} \nStock Point ID: {stock_point_id} || Stock Point Name: {stock_point_name}')  # Access by column name

    res_dict = run_push_recommendation(df_customer_sku_recommendation, 
                            df_master_customer_dim, 
                            df_stockpoint_dim, 
                            stock_point_id,
                            stock_point_name,
                            sku_recency = 7, 
                            customer_recency = 60, number_recommendation = 5, 
                            estimate_qty_scale_factor = 1, max_estimated_qty = 5, 
                            exclude_recency_customer = 4,
                            max_customers_per_route=20,
                            max_volume_per_route=300,
                            max_distance_km = 40,
                            sel_trip_cluster = 5,
                            min_ncust_per_cluster = 5,
                            clustering_method = 'divisive',
                            skip_route_optimization = False,
                            save_to_disk = False,
                            logger=logger)
    
    ALL_STOCKPOINTS_RESULT[stock_point_name] = res_dict

## Fix Map

In [42]:
all_spid_list = df_stockpoint_dim['Stock_Point_ID'].to_list()
selected_trip_spid_list = [int(spid) for spid in list_files_in_directory(SELECTED_TRIP_PATH) if not spid.startswith('index')]
all_cluster_spid_list = [int(spid) for spid in list_files_in_directory(ALL_CLUSTER_PATH) if not spid.startswith('index')]

unmapped_selected_trip_spid_list = list(set(all_spid_list) - set(selected_trip_spid_list))
unmapped_all_cluster_spid_list = list(set(all_spid_list) - set(all_cluster_spid_list))

In [44]:
print(f"All Stock Points: {len(all_spid_list)}")
print(f"All Stock Points: {len(selected_trip_spid_list)}")
print(f"All Stock Points: {len(all_cluster_spid_list)}")
print(f"All Stock Points: {len(unmapped_selected_trip_spid_list)}")
print(f"All Stock Points: {len(unmapped_all_cluster_spid_list)}")

All Stock Points: 72
All Stock Points: 72
All Stock Points: 51
All Stock Points: 0
All Stock Points: 21


In [33]:
print(SELECTED_TRIP_PATH)

/home/azureuser/BT/11_Demand_Engine/Algorithm_V1/recommendation_output/selected_trip_map/2025-07-01


In [None]:
# Copy index.html files from source directories to date-based subdirectories
source_cluster_map_index = BASE_DIR / 'html' / 'default_cluster_map_index.html'
source_selected_trip_index = BASE_DIR / 'html' / 'default_selected_cluster_map_index.html' 

# Copy index.html for selected_trip_map
if len(unmapped_selected_trip_spid_list) > 0:
    try:
        if source_selected_trip_index.exists():
            for spid in unmapped_selected_trip_spid_list:
                spid_path = SELECTED_TRIP_PATH / f'{str(spid)}.html'       
                shutil.copy2(source_selected_trip_index, spid_path)
                logger.debug(f"Copied index.html to {SELECTED_TRIP_PATH / INDEX_HTML}")
        else:
            logger.warning(f"Source index.html not found at {source_selected_trip_index}")
    except Exception as e:
        logger.error(f"Failed to copy index.html to {SELECTED_TRIP_PATH}: {str(e)}")
        
if len(unmapped_all_cluster_spid_list) > 0:
    try:
        if source_cluster_map_index.exists():
            for spid in unmapped_all_cluster_spid_list:
                spid_path = ALL_CLUSTER_PATH / f'{str(spid)}.html'       
                shutil.copy2(source_selected_trip_index, spid_path)
                logger.debug(f"Copied index.html to {ALL_CLUSTER_PATH / INDEX_HTML}")
        else:
            logger.warning(f"Source index.html not found at {source_selected_trip_index}")
    except Exception as e:
        logger.error(f"Failed to copy index.html to {ALL_CLUSTER_PATH}: {str(e)}")
             

In [38]:
unmapped_selected_trip_spid_list

[1646976,
 1647112,
 1647115,
 1646989,
 1647376,
 1647120,
 1646995,
 1647381,
 1647126,
 1647128,
 1647006,
 1647136,
 1647401,
 1647419,
 1647420,
 1647421,
 1647422,
 1647424,
 1647425,
 1647434,
 1647436,
 1647437,
 1647438,
 1646945,
 1647075,
 1647076,
 1647077]

## Export to DB

In [13]:
from src.data.export2db import RecommendationProcessor
from src.data.get_connection import get_connection

In [14]:
# Simple usage
# main(ALL_STOCKPOINTS_RESULT, CURRENT_DATE, get_connection)

# Or use the processor directly
processor = RecommendationProcessor(get_connection)
processor.process(ALL_STOCKPOINTS_RESULT, CURRENT_DATE)

INFO:src.data.export2db:Successfully upserted 29436 rows to dailyPredictedPull
INFO:src.data.export2db:Successfully upserted 674 rows to dailyPredictedPullClusterSummary
INFO:src.data.export2db:Recommendation processing completed successfully


# Clean Up

In [15]:
# Stop the server when done
valhalla_manager.stop_valhalla()

Stopping Valhalla container...
 Container valhalla_nigeria_project-valhalla-1  Stopping
 Container valhalla_nigeria_project-valhalla-1  Stopped
 Container valhalla_nigeria_project-valhalla-1  Removing
 Container valhalla_nigeria_project-valhalla-1  Removed
 Network valhalla_nigeria_project_default  Removing
 Network valhalla_nigeria_project_default  Removed

Valhalla container stopped.
