# Warehouse Data Generator

In [None]:
# This code is used to generate synthetic data for the SimioWarehouseSystem

In [None]:
# Input: a excel file which contains the parameter settings
#  \ Parameter              \ Type                \ Comment
#  \  # of orders           \  Integer            \
#  \  # of Skus             \  Integer            \
#  \  # of Locations        \  Integer            \
#  \  # of line per order   \  String             \ Distribution align with corresponding parameters
#  \  quantity per line     \  String             \ Distribution align with corresponding parameters
#  \  warehouse layout      \  Vector             \ Four corners coordinate value 

In [None]:
# OutPut: several csv files which contain the synthetic warehouse data
#    1. Orders.csv: 
#    2. Skus.csv:
#    3. Order_Skus.csv:
#    4. Locations.csv:
#    5. Skus_Locations.csv:

In [None]:
# import associated packages
import pandas as pd
import numpy as np
import random
import datetime as dt
from datetime import datetime
from sklearn.utils import shuffle

## Get paramters from the input file

<b>New Stuff Here</b>

In [None]:
# Set initial values (in case you don't want to use the xlsx file)
Bounding     = [0, 0, 0, 100, 1000, 1000, 1000, 0]
# 0 - BLx; 1 - BLz; 2 - ULx, etc (going around clockwise - BL, UL, UR, BR)
NumOrders    = 1000
NumSKUs      = 20000 # Generate an error if the number of SKUs exceeds the number of locations
SKUWeights   = [] # if empty, all equally likely.  If not empty, verify that the numbers work
NumLocations = 20000
LineItemDist = ['Uniform', 1, 10]
QuantityDist = ['Uniform', 1,  5]
# For the random generation, you should have a single function to generate Uniform variates and just pass
# the min,max as parameters.  If we change distributions, we can then just create a separate function base
# on the type

In [None]:
# Create a function to read from the file and return the parameter values
def readFromExcel(FileName = 'ParameterSetting.xlsx', SheetName = 'Parameter Setting', SKUWeightsSheet = 'SKU Weights'):
    # code to read goes here
    return(Bounding, NumOrders, NumSKUs, SKUWeigthts, NumLocations, LineItemDist, QuantityDist)

In [None]:
# Create a function to read the SKUWeights from a csv file
def readSKUWeights(FileName = 'SKUWeights.csv'):
    # code to read from csv goes here
    return(SKUWeights)

<b>End of New Stuff</b>

In [None]:
# Read the input csv file and get the parameter settings
PS = pd.read_excel('ParamterSetting.xlsx',sheet_name = 'Parameter Setting')
Sku_weights = pd.read_excel('ParamterSetting.xlsx',sheet_name = 'SKU Weights')

In [None]:
# Get the main parameter settings
Order_No = int(PS['Value'][PS['Name'] == 'OrderNo'].item())                     # number of orders
Sku_No = int(PS['Value'][PS['Name'] == 'SkuNo'].item())                         # number of skus
Locat_No = int(PS['Value'][PS['Name'] == 'LocationNo'].item())                  # number of locations
Line_Dist = PS['Value'][PS['Name'] == 'LineDistribution'].item()                 # The distribution for the number of lines in an order
Quantity_Dist = PS['Value'][PS['Name'] == 'QuantityDistribution'].item()         # The distribution for the quantity in an order line
BL_corner_x = int(PS['Value'][PS['Name'] == 'BL_corner'].item())                # The bottom left corner x coordinate
BL_corner_z = int(PS['Add_value1'][PS['Name'] == 'BL_corner'].item())           # The bottom left corner z coordinate
BR_corner_x = int(PS['Value'][PS['Name'] == 'BR_corner'].item())                # The bottom right corner x coordinate
BR_corner_z = int(PS['Add_value1'][PS['Name'] == 'BR_corner'].item())           # The bottom right corner z coordinate
TL_corner_x = int(PS['Value'][PS['Name'] == 'TL_corner'].item())                # The top left corner x coordinate
TL_corner_z = int(PS['Add_value1'][PS['Name'] == 'TL_corner'].item())           # The top left corner z coordinate
TR_corner_x = int(PS['Value'][PS['Name'] == 'TR_corner'].item())                # The top right corner x coordinate
TR_corner_z = int(PS['Add_value1'][PS['Name'] == 'TR_corner'].item())           # The top right corner z coordinate

In [None]:
# Get associated additional paramters with the line distribution 
if Line_Dist == 'Uniform':
    Line_min = int(PS['Add_value1'][PS['Name'] == 'LineDistribution'].item())   # The min value in uniform distribution
    Line_max = int(PS['Add_value2'][PS['Name'] == 'LineDistribution'].item())   # The max value in uniform distribution

In [None]:
# Get associated additional parameters with the quantity distribution 
if Quantity_Dist == 'Uniform':
    Quantity_min = int(PS['Add_value1'][PS['Name'] == 'QuantityDistribution'].item())       # The min value in uniform distribution
    Quantity_max = int(PS['Add_value2'][PS['Name'] == 'QuantityDistribution'].item())       # The max value in uniform distribution

## Function definations

In [None]:
# Define the function that provides uniform distribution decimal or integer value
###################################################################################################################
# Input :         is_int: indicate return integer value or float value. Default value is true
#                  v_min: the lower bound of the uniform distribution
#                  v_max: the upper bound of the uniform distribution
#                  digit: the decimal digit. Default value is 1
#
# Output:          a uniform distribution value
####################################################################################################################
def Uniform(v_min, v_max,is_int=True, digit=1 ):
    if is_int == True:
        return random.randint(v_min, v_max)
    else:
        return round(random.uniform(v_min, v_max), digit)

In [None]:
# Define the function that generates the number of lines in an order
###################################################################################################################
# Input :      line_dist: the distribution character of the number of lines in an order. Default is 'Uniform'
#              addional1: corresponding first parameter for the certain line_dist. If line_dist='Uniform', 
#                         this is lower bound
#              addional2: corresponding second parameter for the certain line_dist. If line_dist='Uniform', 
#                         this is upper bound 
#              addional3: corresponding third parameter for the certain line_dist 
#
# Output:     line_count: the number of lines in an order  
###################################################################################################################
def Lines_Per_Order(line_dist='Uniform', addional1=None, addional2= None, addional3=None):
    if line_dist == 'Uniform':
            line_count = Uniform(addional1,addional2)
    return line_count

In [None]:
# Define the function that selects rows in a dataframe
#####################################################################################################################
# Input :      population: the sample population. List
#                 weights: the weight value associated with the population. List. If weights=None, use uniform 
#                          distribution to select entries
#                    size: number of samples we want to get
#                 replace: whether the sample is with or without replacement
#
# Output:            the sample stored as a np.array 
#####################################################################################################################
def Sample(population, weights, size, replace=False):
    return np.random.choice(a=population, size=size, replace=replace, p=weights)

In [None]:
# Define the function that generates the sku quantity in an order line
###################################################################################################################
# Input :      quan_dist: the distribution character of the sku quantity in an order line. Default is 'Uniform'
#              addional1: corresponding first parameter for the certain quan_dist. If quan_dist='Uniform', 
#                         this is lower bound
#              addional2: corresponding second parameter for the certain quan_dist. If quan_dist='Uniform', 
#                         this is upper bound 
#              addional3: corresponding third parameter for the certain quan_dist
#
# Output:       quantity: the number of sku quantity in an order line 
###################################################################################################################
def Quantity_Per_Line(quan_dist='Uniform', addional1=None, addional2= None, addional3=None):
    if quan_dist == 'Uniform':
        quantity = Uniform(addional1,addional2)
    return quantity

In [None]:
# Define the function that generates the initial dataframe for Order, Sku and Location
####################################################################################################################
# Input :     rownumber: the number of rows in the generated dataframe
#               df_type: specify the dataframe is for Order or Sku or Location. Ex. df_type = 'Order'
#
# Output:            df: the generated dataframe
####################################################################################################################
def DF_initial(rownumber, df_type):
    IDname = df_type + 'ID'
    # Generate the order dataframe
    df = pd.DataFrame(np.arange(0,rownumber,1),columns=[IDname])
    # Modify the ID to let it have proper names
    df[IDname] = df[IDname].apply(lambda x: df_type+ str(x))
    return df

In [None]:
# Define the function that generates the initial dataframe for Order_Sku 
#####################################################################################################################
# Input :        order_df: the initial order dataframe
#                  sku_df: the initial sku dataframe
#             sku_weights: the weight values for skus
#               line_dist: the distribution character of the number of lines in an order. Default is 'Uniform'
#               line_add1: corresponding first parameter for the certain line_dist 
#               line_add2: corresponding second parameter for the certain line_dist 
#               line_add3: corresponding third parameter for the certain line_dist 
#               quan_dist: the distribution character of the sku quantity in an order line. Default is 'Uniform'
#               quan_add1: corresponding first parameter for the certain quan_dist 
#               quan_add2: corresponding second parameter for the certain quan_dist 
#               quan_add3: corresponding third parameter for the certain quan_dist
#
# Output:              df: the order_sku dataframe
######################################################################################################################
def DF_Order_Sku_initial(order_df, sku_df, sku_weights=None, line_dist='Uniform', line_add1=None, line_add2=None, line_add3=None, quan_dist='Uniform', quan_add1=None, quan_add2=None, quan_add3=None):
    order_no = len(order_df)
    column_names = ['OrderID','SkuID','Quantity']
    df = pd.DataFrame(columns = column_names)
    population = sku_df['SkuID'].tolist()
    if sku_weights is None:
        weights = sku_weights
    else:
        weights = sku_weights['Weight'].tolist()
    for i in range(order_no):
        # get the lines for the ith order
        line = Lines_Per_Order(line_dist, line_add1,line_add2)
        # create rows associated with the order
        df = df.append([order_df.iloc[i]]*line,ignore_index=True)
        # assign SkuID for each row
        df['SkuID'][-line:] =  Sample(population, weights, line)
    # assign Sku quantities for each row
    df['Quantity'] = 0
    df['Quantity'] = df['Quantity'].apply(lambda x: Quantity_Per_Line('Uniform', quan_add1, quan_add2))
    return df

In [None]:
# Define the function that generates the initial dataframe for Sku_Location
##########################################################################################################################
# Input :         sku_df: the initial sku dataframe
#            location_df: the initial location dataframe
#                weights: he weight value associated with the population. List. If weights=None, use uniform 
#                         distribution to select entries
#                   rule: the matching rule for skus and locations. four rules can be selected:
#                            'oto': each sku can only be placed in one location and each location can only hold one sku
#                            'otm': each sku can be placed in multiple locations but each location can only hold one sku
#                            'mto': each sku can only be placed in one location but each location can hold multiple skus
#                            'mtm': each sku can be placed in multiple locations and each location can hold multiple skus
#
# Output:             df: the initial sku-location dataframe      
###########################################################################################################################
def DF_Sku_Location_initial(sku_df, location_df, weights=None, rule='oto'):
    if rule == 'oto':
        # each sku can only be placed in one location and each location can only hold one sku
        if len(sku_df) > len(location_df):
            print('The number of Skus cannot be greater than the number of Locations in One_to_One rule ')
            return
        df = sku_df.copy()
        row_no = len(df)
        col_name = 'LocationID'
        replace = False
        population = location_df['LocationID'].tolist()
    elif rule == 'otm':
        # each sku can be placed in multiple locations but each location can only hold one sku
        df = location_df
        col_name = 'SkuID'
        replace = True
        population = sku_df['SkuID'].tolist()
    elif rule == 'mto':
        # each sku can only be placed in one location but each location can hold multiple skus
        df = sku_df
        col_name = 'LocationID'
        replace = True
        population = location_df['LocationID'].tolist()
    elif rule == 'mtm':       
        # each sku can be placed in multiple locations and each location can hold multiple skus
        column_names = ['SkuID','LocationID']
        df = pd.DataFrame(columns = column_names)
        # TBD
    else:
        print('Rule is not recognized,please check the rule parameter.')
        
    row_no = len(df)
    df[col_name] = Sample(population, weights, row_no, replace)
    
    return df
    

In [None]:
# Define the function that generates a list of datetime(YYYY-MM-DD HH:MM:SS)
##########################################################################################################################
# Input :             size: the returned list size
#                startdate: the start date. Format 'MM/DD/YYYY HH:MM:SS'
#                  enddate: the end date. Format 'MM/DD/YYYY HH:MM:SS'
#                time_rule: specify the time in a date. Can be 'fixed' or 'random'.
#
# Output:        date_list: the return datetime list
##########################################################################################################################
def Date_Time(size, startdate='09/05/2020 00:00:00',enddate='20/05/2020 00:00:00', time_rule='fixed'):
    start = datetime.strptime( startdate,'%d/%m/%Y %H:%M:%S')
    end = datetime.strptime( enddate,'%d/%m/%Y %H:%M:%S')
    # get the zero time (00:00:00)
    zero_time = datetime(2019,8,10,0,0,0).time()
    adjust_date = datetime.combine(start.date(),zero_time)
    days_between_dates = (end-start).days
    date_list = Sample(range(days_between_dates),weights=None, size=size, replace=True).tolist()
    date_list.sort()
    for i in range(size):
        if time_rule == 'fixed':
            date_list[i] = start + dt.timedelta(days=date_list[i])
        elif time_rule == 'random':
            date_list[i] = adjust_date + dt.timedelta(days=date_list[i], seconds=random.randrange(86400))
        else:
            print('Cannot recognize the time_rule parameter, please check it.')
    return date_list

## Generate initial dataframes

In [None]:
# Generate the order, sku and location initial dataframe
Order_initial = DF_initial(Order_No,'Order')
Sku_initial = DF_initial(Sku_No, 'Sku')
Location_initial = DF_initial(Locat_No,'Location')
# Order_initial.head()
# Sku_initial.head()
# Location_initial.head()

In [None]:
# Generate the initial order-sku dataframe
Order_Sku_initial = DF_Order_Sku_initial(Order_initial,Sku_initial,Sku_weights, line_add1 = Line_min, line_add2 = Line_max, quan_add1 = Quantity_min, quan_add2 = Quantity_max)
# Order_Sku_df.head()

In [None]:
# Generate the initial sku-location dataframe
Sku_Location_initial = DF_Sku_Location_initial(Sku_initial, Location_initial,rule='oto')
# Sku_Location_initial.head()

## Complete dataframes

### Complete Order dataframe

In [None]:
# Temperory order dataframe
Order_temp = Order_initial

In [None]:
# Add release date column
col_name = 'ReleaseDate'
release_list = Date_Time(Order_No, startdate='09/05/2020 00:00:00',enddate='20/05/2020 00:00:00')
Order_temp[col_name] = release_list

In [None]:
# Add due date column
col_name = 'DueDate'
due_list = Date_Time(Order_No,startdate='11/05/2020 23:59:59',enddate='22/05/2020 23:59:59')
Order_temp[col_name] = due_list

In [None]:
# Final order dataframe
Order_df = Order_temp

### Complete Sku dataframe

In [None]:
# Temperory sku dataframe
Sku_temp = Sku_initial

In [None]:
# Add volumn column
col_name = 'Volumn'
volumn_array = Sample(range(10),weights=None, size=Sku_No, replace=True)
Sku_temp[col_name] = volumn_array

In [None]:
# Add weight column
col_name = 'Weight'
Sku_temp[col_name] = 0
Sku_temp[col_name] = Sku_temp[col_name].apply(lambda x: Uniform(5, 10,is_int=False, digit=1 ))

In [None]:
# Final sku dataframe
Sku_df = Sku_temp 

### Complete Location dataframe

In [None]:
# Temperory location dataframe
Location_temp = Location_initial

In [None]:
# Add X-coordinate value column
col_name = 'Xloc'
Location_temp[col_name] = 0.0
Location_temp[col_name] = Location_temp[col_name].apply(lambda x: Uniform(BL_corner_x, TR_corner_x,is_int=False, digit=1 ))

In [None]:
# Add Z-coordinate value column
col_name = 'Zloc'
Location_temp[col_name] = 0.0
Location_temp[col_name] = Location_temp[col_name].apply(lambda x: Uniform(BL_corner_z, TR_corner_z,is_int=False, digit=1 ))

In [None]:
# Final Location dataframe
Location_df = Location_temp

### Complete Order_Sku dataframe

In [None]:
# Temperory order_sku dataframe
Order_Sku_temp = Order_Sku_initial

In [None]:
# Final order_sku dataframe
Order_Sku_df = Order_Sku_temp

### Complete Sku_Location dataframe

In [None]:
# Temperory sku_location dataframe
Sku_Location_temp = Sku_Location_initial

In [None]:
# Final sku_location dataframe
Sku_Location_df = Sku_Location_temp

## Output the csv files

In [None]:
# Output dataframes to csv files
Order_df.to_csv('Orders.csv',index=False)
Sku_df.to_csv('Skus.csv',index=False)
Location_df.to_csv('Locations.csv',index=False)
Order_Sku_df.to_csv('Order_Sku.csv',index=False)
Sku_Location_df.to_csv('Sku_Location.csv',index=False)