In [25]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import random
import calendar

In [26]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import random
import calendar

np.random.seed(42)
random.seed(42)

start_date = datetime(datetime.now().year - 5, 1, 1)
end_date = pd.Timestamp(datetime.now()).to_period('M').end_time.date()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

customer_business_type = {
    'Amazon': 'Final Mile',
    'Walmart': 'Middle Mile',
    'Target': 'Middle Mile',
    'Costco': 'First Mile',
    'Home Depot': 'Final Mile',
    'T.J. Maxx': 'Middle Mile',
    'Home Goods': 'Middle Mile',
    'Chick-Fil-A': 'Final Mile',
    'Chipotle': 'Final Mile',
    'Lowes': 'Final Mile',
    'Shein': 'Middle Mile',
    'Starbucks': 'Middle Mile'
}

customer_price_per_piece = {
    'Amazon': [5, 25],
    'Walmart': [2.5, 30],
    'Target': [2.5, 25],
    'Costco': [8, 40],
    'Home Depot': [10, 75],
    'T.J. Maxx': [3, 15],
    'Home Goods': [3, 20],
    'Chick-Fil-A': [7, 15],
    'Chipotle': [2.5, 20],
    'Lowes': [8, 60],
    'Shein': [5, 25],
    'Starbucks': [4, 18]
}

customer_delivery_frequency = {
    'Amazon': 0.9,
    'Walmart': 0.85,
    'Target': 0.7,
    'Costco': 0.6,
    'Home Depot': 0.5,
    'T.J. Maxx': 0.4,
    'Home Goods': 0.4,
    'Chick-Fil-A': 0.3,
    'Chipotle': 0.3,
    'Lowes': 0.6,
    'Shein': 0.5,
    'Starbucks': 0.4
}

customers = list(customer_business_type.keys())

locations = ['Atlanta', 'Birmingham', 'New York', 'Chicago', 'Los Angeles', 'Miami', 'Milwaukee', 'Seattle', 
             'Nashville', 'Minneapolis', 'Detroit', 'San Francisco', 'Houston', 'Dallas', 'Tampa Bay', 
             'Sacramento', 'San Diego']

# 1. Assign each customer to a random subset of locations
customer_locations = {}
for customer in customers:
    num_locations = random.randint(3, len(locations))  # Each customer active in 3 to 8 locations
    customer_locations[customer] = random.sample(locations, num_locations)

# 2. Give each customer a base demand multiplier
customer_base_demand = {customer: random.uniform(0.8, 2.0) for customer in customers}

# 3. Customer-location-specific price range and location demand tweaks
customer_location_demand = {}
customer_location_price = {}

for customer in customers:
    customer_location_demand[customer] = {}
    customer_location_price[customer] = {}
    for location in locations:
        if location in customer_locations[customer]:
            # Location demand tweak (varies per customer-location)
            customer_location_demand[customer][location] = random.uniform(0.5, 2.0)

            # Fixed price per piece for this customer-location
            price_range = customer_price_per_piece[customer]
            customer_location_price[customer][location] = random.uniform(price_range[0], price_range[1])

def get_holiday_dates(year):
    holidays = []
    holidays.append(date(year, 1, 1))
    holidays.append(date(year, 7, 4))
    holidays.append(date(year, 12, 25))

    # Thanksgiving (4th Thursday of November)
    c = calendar.Calendar()
    thursdays = [day for day in c.itermonthdates(year, 11) if day.weekday() == calendar.THURSDAY and day.month == 11]
    if len(thursdays) >= 4:
        holidays.append(thursdays[3])

    return holidays

all_holidays = set()
for year in range(start_date.year, end_date.year + 1):
    all_holidays.update(get_holiday_dates(year))

year_factors = {
    year: 1.0 + (increment / 10) for increment, year in enumerate(date_range.year.unique())
}

data = []

for work_date in date_range:
    weekday = work_date.weekday()
    month = work_date.month
    year = work_date.year
    date_only = work_date.date()

    if date_only in all_holidays:
        continue

    # --- Day-of-Week Factor ---
    if weekday < 2:
        weekday_factor = 2.0  # Monday/Tuesday spike
    elif weekday < 5:
        weekday_factor = 1.0  # Midweek
    else:
        weekday_factor = 0.4  # Weekends

    # --- Month/Seasonality Factor ---
    if month in [1, 2, 10, 11, 12]:
        month_factor = 1.5
    elif month in [5, 6, 7]:
        month_factor = 0.7
    else:
        month_factor = 1.0

    # --- Year Factor ---
    year_factor = year_factors[year]


    demand_factor = weekday_factor * month_factor * year_factor

    for customer in customers:
        for location in customer_locations[customer]:
            delivery_probability = customer_delivery_frequency[customer]
            if random.random() > delivery_probability:
                continue
            # Calculate expected order count with less randomness
            base_orders = 10 * customer_base_demand[customer] * customer_location_demand[customer][location] * demand_factor
            order_count = int(base_orders)

            if order_count < 1:
                continue

            # Keep pieces per order stable with light randomness
            pieces_per_order = 5 + int(np.random.normal(0, 1))  # Small variation
            total_pieces = max(1, order_count * pieces_per_order)

            # Fixed price per piece per customer-location
            price_per_piece = customer_location_price[customer][location]
            total_revenue = total_pieces * price_per_piece

            data.append({
                'WorkDate': date_only,
                'Customer': customer,
                'Location': location,
                'BusinessType': customer_business_type[customer],
                'OrderCount': order_count,
                'NumberOfPieces': total_pieces,
                'TotalRevenue': round(total_revenue, 2)
            })

df = pd.DataFrame(data)
df.sort_values('WorkDate', inplace=True)
df.reset_index(drop=True, inplace=True)

df.head()


Unnamed: 0,WorkDate,Customer,Location,BusinessType,OrderCount,NumberOfPieces,TotalRevenue
0,2020-01-02,Amazon,Chicago,Final Mile,38,190,2084.09
1,2020-01-02,Home Depot,Sacramento,Final Mile,34,136,6153.01
2,2020-01-02,Home Depot,Chicago,Final Mile,43,215,15691.72
3,2020-01-02,Home Depot,Detroit,Final Mile,41,164,6490.39
4,2020-01-02,Home Depot,Atlanta,Final Mile,44,220,10069.65


In [27]:
df.shape

(126255, 7)

In [28]:
df.WorkDate = pd.to_datetime(df.WorkDate)
df = df.set_index('WorkDate')

In [29]:
df.to_csv('supply_chain_deliveries.csv')