# Swiggy Warehouse Picklist Generator

### Clean Code without Examples and Code Runs

This file contains a clean version of the code, designed for better readability and focus on the core logic. It does not include intermediate examples or code runs that might be present during development or exploratory analysis.

In [None]:
import pandas as pd
df=pd.read_csv("data.csv")

In [None]:
df = df.drop(['dt','length_in_cm', 'width_in_cm',
       'height_in_cm','order_tag','location_code'], axis=1)
print("Columns after removing 'dt' column:")
print(df.columns)

In [None]:
df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce', dayfirst=True)

In [None]:
def orders_of_the_day(date):
    return df[df['order_date']==date]

In [None]:
import heapq
from datetime import time
import os
import shutil

In [None]:
class PickList:
    """
    Warehouse Pick-List Management System.
    Handles order grouping, zone prioritization, bin-packing for picklists,
    and time/weight constraint enforcement.
    """
    POD_PRIORITIES = ['P1','P2','P3','P4','P5','P6','P9']

    def __init__(self, dataframe, date):
        """
        Initializes the PickList object for a specific operational date.
        Filters main dataframe for the day's orders and splits them into a dictionary by priority.
        """
        self.date = date
        self.dataframe = orders_of_the_day(date)
        self.pod_dict = {
            pod: self.dataframe[self.dataframe['pod_priority'] == pod]
            for pod in self.POD_PRIORITIES
        }

    def build_zone_dict(self, pod_priority):
        """
        Groups the pod-specific data into warehouse zones.
        Sorts SKUs within each zone by quantity to facilitate optimized picking.
        """
        if pod_priority not in self.pod_dict:
            raise ValueError(f"Invalid pod priority: {pod_priority}")

        pod_df = self.pod_dict[pod_priority]
        zone_dict = {
            zone: pod_df[pod_df['zone'] == zone]
            .sort_values(by=['sku', 'order_qty'], ascending=False)
            for zone in pod_df['zone'].unique()
        }
        return zone_dict

    def display_data(self, pod_priority, n=5):
        """Displays the top N rows of a specific priority level for inspection."""
        print(f"DataFrame for pod priority: {pod_priority}")
        display(self.pod_dict[pod_priority].head(n))

    def zone_counts(self, pod_priority):
        """Calculates the frequency of order lines (rows) appearing in each zone."""
        if pod_priority not in self.pod_dict:
            raise ValueError(f"Invalid pod priority: {pod_priority}")

        return (
            self.pod_dict[pod_priority]
            .groupby('zone')
            .size()
            .rename('count')
            .sort_values(ascending=False)
        )

    def quantity_counts(self, pod_priority):
        """Calculates the total number of physical units required per zone."""
        if pod_priority not in self.pod_dict:
            raise ValueError(f"Invalid pod priority: {pod_priority}")

        return (
            self.pod_dict[pod_priority]
            .groupby('zone')['order_qty']
            .sum()
            .rename('total_order_qty')
            .sort_values(ascending=False)
        )

    def build_zone_priority_queue(self, pod_priority):
        """
        Creates a max-heap priority queue using negative quantity as the key.
        Ensures that zones with the heaviest workload are processed first.
        """
        global ZONE_PRIORITY_QUEUE
        ZONE_PRIORITY_QUEUE = []

        if pod_priority not in self.pod_dict:
            raise ValueError(f"Invalid pod priority: {pod_priority}")

        qty_series = self.quantity_counts(pod_priority)

        for zone, total_qty in qty_series.items():
            heapq.heappush(
                ZONE_PRIORITY_QUEUE,
                (-total_qty, zone, pod_priority)
            )

    def print_zone_priority_queue(self):
        """Prints the priority order of zones based on total unit volume."""
        global ZONE_PRIORITY_QUEUE

        if not ZONE_PRIORITY_QUEUE:
            print("Zone priority queue is empty.")
            return

        print("Zone Priority Queue (Highest → Lowest total_order_qty):")
        temp_queue = ZONE_PRIORITY_QUEUE.copy()
        heapq.heapify(temp_queue)

        while temp_queue:
            total_qty, zone, pod = heapq.heappop(temp_queue)
            print(f"Pod: {pod}, Zone: {zone}, Total Order Qty: {-total_qty}")

    def Generate_picklist(self, pod_priority):
        """
        Main algorithm for generating optimized picklists.
        Uses a First-Fit Decreasing bin-packing approach to group SKUs
        while respecting Time, Unit, and Weight constraints.
        """
        global ZONE_PRIORITY_QUEUE
        global ZONE_PICKLIST_QUEUE
        global ZONE_PICKLIST_DICT

        if not ZONE_PRIORITY_QUEUE:
            print("Zone priority queue is empty.")
            return {}

        ZONE_PICKLIST_QUEUE = []
        ZONE_PICKLIST_DICT = {}

        zone_dict = self.build_zone_dict(pod_priority)
        temp_queue = ZONE_PRIORITY_QUEUE.copy()
        heapq.heapify(temp_queue)

        # Setting time constraints based on Priority Level (POD)
        if pod_priority in ['P1','P2']:
            MAX_TIME_SEC = 140 * 60  # 140 mins for High Priority
        elif pod_priority in ['P3','P4','P5','P6']:
            MAX_TIME_SEC = 50 * 60   # 50 mins for Standard
        else:
            MAX_TIME_SEC = 110 * 60  # Default 110 mins

        MAX_UNITS = 2000

        while temp_queue:
            neg_qty, zone, pod = heapq.heappop(temp_queue)
            zone_df = zone_dict.get(zone)
            if zone_df is None or zone_df.empty:
                continue

            # Fragile zones have lower weight capacity
            weight_limit = 50000 if zone == "FRAGILE_FD" else 200000

            # Aggregate SKU data for efficient bin-packing
            sku_summary = (
                zone_df.groupby('sku')
                .agg(total_units=('order_qty', 'sum'), total_weight=('weight_in_grams', 'sum'))
                .reset_index()
                .sort_values(by='total_units', ascending=False)
            )

            picklist_bins = []

            for _, sku_row in sku_summary.iterrows():
                sku_id, sku_units, sku_weight = sku_row['sku'], sku_row['total_units'], sku_row['total_weight']
                sku_df = zone_df[zone_df['sku'] == sku_id]

                placed = False
                # Attempt to fit SKU into existing picklist bins
                for bin_data in picklist_bins:
                    temp_df = pd.concat([bin_data['df'], sku_df])
                    estimated_time = self.calculate_picklist_time(temp_df)
                    total_units = temp_df['order_qty'].sum()

                    if (estimated_time <= MAX_TIME_SEC and
                        bin_data['weight'] + sku_weight <= weight_limit and
                        total_units <= MAX_UNITS):
                        bin_data['df'] = temp_df
                        bin_data['weight'] += sku_weight
                        placed = True
                        break

                # Create a new bin if the SKU doesn't fit in existing ones
                if not placed:
                    picklist_bins.append({'df': sku_df.copy(), 'weight': sku_weight})

            # Sort items inside picklist by location for Serpentine Path optimization
            zone_picklists = [
                bin_data['df'].sort_values(by=['floor', 'aisle', 'rack'], ascending=[True, True, True])
                for bin_data in picklist_bins
            ]

            ZONE_PICKLIST_DICT[zone] = zone_picklists
            heapq.heappush(ZONE_PICKLIST_QUEUE, (-len(zone_picklists), zone))

    def calculate_picklist_time(self, picklist_df):
        """
        Mathematical model to estimate the time a picker takes to complete a list.
        Factors: Travel, unique SKU location stops, scanning/picking units, and unloading.
        """
        if picklist_df is None or picklist_df.empty:
            return 0

        start_to_zone, zone_to_staging = 120, 120  # Fixed overhead
        intra_zone_time = picklist_df['sku'].nunique() * 30  # Stop time per SKU
        pickup_time = picklist_df['order_qty'].sum() * 5     # Handling time per Unit
        unloading_time = picklist_df['order_id'].nunique() * 30 # Post-pick sorting

        return start_to_zone + intra_zone_time + pickup_time + unloading_time + zone_to_staging

    def display_generate_picklist_output(self):
        """Prints a summary of all generated picklists for the current session."""
        global ZONE_PICKLIST_QUEUE, ZONE_PICKLIST_DICT

        if not ZONE_PICKLIST_QUEUE or not ZONE_PICKLIST_DICT:
            print("Picklists have not been generated yet.")
            return

        print("===== GENERATED PICKLIST OUTPUT =====\n")
        temp_queue = ZONE_PICKLIST_QUEUE.copy()
        heapq.heapify(temp_queue)

        while temp_queue:
            neg_batches, zone = heapq.heappop(temp_queue)
            batch_count = -neg_batches
            print(f"Zone: {zone} | Total Picklists: {batch_count}")

            for i, batch_df in enumerate(ZONE_PICKLIST_DICT.get(zone, []), start=1):
                total_units = batch_df['order_qty'].sum()
                total_weight = batch_df['weight_in_grams'].sum() / 1000
                time_sec = self.calculate_picklist_time(batch_df)

                print(f"Picklist {i}: {time_sec / 60:.2f} minutes")
                print(f"  └─ {total_units} units | {total_weight:.2f} kg")
                display(batch_df)

    def export_picklists_to_csv(self, output_dir, start_rank):
        """
        Saves picklists to CSV files using a flat folder structure.
        Ensures filenames are unique by using a global counter (start_rank).
        """
        global ZONE_PICKLIST_DICT

        if not ZONE_PICKLIST_DICT:
            return start_rank

        os.makedirs(output_dir, exist_ok=True)
        clean_date = str(self.date).split(' ')[0] # Remove 00:00:00 timestamp

        # Gather all zone batches for unified sorting by time
        all_batches = []
        for zone, picklists in ZONE_PICKLIST_DICT.items():
            for batch_df in picklists:
                time_sec = self.calculate_picklist_time(batch_df)
                all_batches.append({"time_sec": time_sec, "df": batch_df.copy()})

        # Sort all lists globally (longest tasks first)
        all_batches.sort(key=lambda x: x["time_sec"], reverse=True)

        current_rank = start_rank
        for record in all_batches:
            df = record["df"].reset_index(drop=True)
            picklist_id = f"PL_{current_rank:03d}"

            df['order_date'] = clean_date
            df['picklist_no'] = picklist_id
            df['bin_rank'] = df.index + 1

            # Select required columns for export
            export_df = df[['order_date', 'picklist_no', 'sku', 'store_id', 'bin', 'bin_rank']]

            # Filename: YYYY-MM-DD_RANK.csv
            filename = f"{clean_date}_{current_rank:03d}.csv"
            export_df.to_csv(os.path.join(output_dir, filename), index=False)
            current_rank += 1

        return current_rank

In [None]:
POD_PRIORITIES = ['P1','P2','P3','P4','P5','P6','P9']
os.makedirs("Warehouse_Operations", exist_ok=True)

def run_consolidated_warehouse_workflow(df, base_dir="Warehouse_Operations"):
    """
    Orchestrates the end-to-end picklist generation process across multiple dates.

    This function handles:
    1. Date-wise folder creation.
    2. Chronological processing of unique dates.
    3. Maintaining a global file rank (001, 002...) to prevent overwriting across priorities.
    """

    # 1. Identify and sort all unique dates in the dataset for sequential processing
    unique_dates = sorted(df['order_date'].unique())

    for current_date in unique_dates:
        # 2. Date Cleaning: Remove timestamps (e.g., '00:00:00') for folder naming
        clean_date_str = str(current_date).split(' ')[0]
        date_folder = os.path.join(base_dir, clean_date_str)
        os.makedirs(date_folder, exist_ok=True)

        # 3. Initialize the PickList engine for the specific date
        pl_engine = PickList(df, current_date)

        # 4. Global Counter: This prevents filename collisions between P1, P2, etc.
        # Reset to 1 at the start of every new date folder.
        global_day_rank = 1

        # 5. Iterate through each Priority (POD) level defined in the class
        for priority in pl_engine.POD_PRIORITIES:
            # Skip priorities that have no orders on this specific date
            if priority not in pl_engine.pod_dict or pl_engine.pod_dict[priority].empty:
                continue

            try:
                # 6. Rank Zones: Determine which areas have the highest volume first
                pl_engine.build_zone_priority_queue(priority)

                # 7. Pack Bins: Apply constraints (Weight/Time/Units) to group SKUs
                pl_engine.Generate_picklist(priority)

                # 8. Export & Update: Save CSVs to the date folder.
                # The function returns the NEXT available rank number to maintain continuity.
                global_day_rank = pl_engine.export_picklists_to_csv(date_folder, global_day_rank)

            except Exception as e:
                # Catch-all for unexpected errors during a priority run to keep the loop moving
                print(f"Error processing {priority} on {clean_date_str}: {e}")
                pass

In [None]:
pick_list_obj=PickList(df,'2025-11-08')

In [None]:
pick_list_obj.build_zone_dict('P1')


In [None]:
pick_list_obj.display_data('P1')

In [None]:
pick_list_obj.build_zone_priority_queue('P1')
pick_list_obj.print_zone_priority_queue()

In [None]:
pick_list_obj.Generate_picklist('P1')

In [None]:
pick_list_obj.display_generate_picklist_output()

In [None]:
run_consolidated_warehouse_workflow(df)