### Fulfillment Data Generation

In absence of data from the real project, I'm generating hypothetical order fulfillment data. Temperature and noise probabilities affect where a defect is reported on a given order. The data is then saved as a CSV file for bulk upload to Supabase.

In [197]:
import pandas as pd
import numpy as np
import datetime
import uuid

#### Load generated environmental data and convert date field strings to datetime (numpy datetime64) objects

In [198]:
df = pd.read_csv('../../data/shipdock_environmental_data_0809.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
print(df.head())

            timestamp  temperature  noise_level
0 2024-08-01 08:00:00        60.98        42.31
1 2024-08-01 09:00:00        66.56        59.00
2 2024-08-01 10:00:00        71.55        42.84
3 2024-08-01 11:00:00        75.44        48.31
4 2024-08-01 12:00:00        78.97        59.00


#### Define hypothetical defect probabilities based on environmental conditions

In [199]:
TEMP_THRESHOLD = 80
NOISE_THRESHOLD = 60
BASE_DEFECT_PROBABILITY = 0.02
HIGH_TEMP_PROB = 0.2
HIGH_NOISE_PROB = 0.15
COMBINED_PROB = 0.5

#### Create simulated sales fulfillment data

In [200]:
simulated_orders = []

for index, row in df.iterrows():
    timestamp = row['timestamp']
    temperature = row['temperature']
    noise_level = row['noise_level']

    # Only generate orders during operating hours (8 AM to 5 PM)
    if 8 <= timestamp.hour < 17:
        for _ in range(np.random.poisson(4)):
            order_id = uuid.uuid4()
            pack_date = timestamp       # environmental reading should coincide with packing time to reflect a related defect
            ship_date = timestamp + datetime.timedelta(hours=np.random.randint(6,24))
            ship_method = np.random.choice(['usps', 'fedex', 'ups', 'dhl'])     # matches custom enum in Supabase
            number_of_items = np.random.randint(1,6)
            box_size = np.random.choice(['A', 'B', 'C'])                        # matches custom enum in Supabase

            defect_probability = BASE_DEFECT_PROBABILITY

            if temperature >= TEMP_THRESHOLD and noise_level >= NOISE_THRESHOLD: 
                defect_probability = COMBINED_PROB
            elif temperature >= TEMP_THRESHOLD:
                defect_probability = HIGH_TEMP_PROB
            elif noise_level >= NOISE_THRESHOLD:
                defect_probability = HIGH_NOISE_PROB

            defect_list = [None, 'missing items', 'wrong items', 'poorly packed']     # matches custom enum in Supabase
            # base probability is always low, remaining probability divided evenly between other three possibilities
            defect_reported = np.random.choice(defect_list, 1, p=[1 - defect_probability, defect_probability/3, 
                                                                defect_probability/3, defect_probability/3])[0]

            simulated_order_date = timestamp - datetime.timedelta(hours=np.random.randint(1,24))

            simulated_orders.append({
                'timestamp': simulated_order_date,
                'order_id': order_id,
                'pack_date': pack_date,
                'ship_date': ship_date,
                'ship_method': ship_method,
                'number_of_items': number_of_items,
                'box_size': box_size,
                'defect_reported': defect_reported
            })

simulated_df = pd.DataFrame(simulated_orders)

print(simulated_df['ship_method'].head(10))

0      ups
1    fedex
2    fedex
3      dhl
4     usps
5      ups
6    fedex
7      dhl
8    fedex
9      dhl
Name: ship_method, dtype: object


#### Save to CSV

In [201]:
simulated_df.to_csv('order_fulfillment_data_0809.csv', index=False)