# Warehouse Data Generator

In [None]:
# This code is used to generate synthetic data for the SimioWarehouseSystem

In [None]:
# Input: a excel file which contains the parameter settings
#  \ Parameter              \ Type                \ Comment
#  \  # of orders           \  Integer            \
#  \  # of Skus             \  Integer            \
#  \  # of Locations        \  Integer            \
#  \  # of line per order   \  String             \ Distribution align with corresponding parameters
#  \  quantity per line     \  String             \ Distribution align with corresponding parameters
#  \  warehouse layout      \  Vector             \ Four corners coordinate value 

In [None]:
# OutPut: several csv files which contain the synthetic warehouse data
#    1. Orders.csv: 
#    2. Skus.csv:
#    3. Order_Skus.csv:
#    4. Locations.csv:
#    5. Skus_Locations.csv:

In [None]:
# import associated packages
import pandas as pd
import numpy as np
import random
import datetime as dt
from datetime import datetime
from sklearn.utils import shuffle

## Function definations

In [None]:
# Define the function to read from the file and return the parameter values
#####################################################################################################################
# Input :           FileName: The xlsx file that contains the parameter settings
#                  SheetName: The worksheet name that contains the main parameter settings
#            SKUWeightsSheet: The worksheet name that contains the SKU weights
#
# Output:    The paramter settings
#######################################################################################################################
def readFromExcel(FileName = 'ParameterSetting.xlsx', SheetName = 'Parameter Setting', SKUWeightsSheet = 'SKU Weights'):
    # Read the input csv file and get the parameter settings
    PS = pd.read_excel('ParamterSetting.xlsx',sheet_name = 'Parameter Setting')
    SkuWeights = pd.read_excel('ParamterSetting.xlsx',sheet_name = 'SKU Weights')
    
    NumOrders = int(PS['Value'][PS['Name'] == 'OrderNo'].item())                     # number of orders
    NumSKUs = int(PS['Value'][PS['Name'] == 'SkuNo'].item())                         # number of skus
    NumLocations = int(PS['Value'][PS['Name'] == 'LocationNo'].item())                  # number of locations
    
    BLX = int(PS['Value'][PS['Name'] == 'BL_corner'].item())                # The bottom left corner x coordinate
    BLZ = int(PS['Add_value1'][PS['Name'] == 'BL_corner'].item())           # The bottom left corner z coordinate
    BRX = int(PS['Value'][PS['Name'] == 'BR_corner'].item())                # The bottom right corner x coordinate
    BRZ = int(PS['Add_value1'][PS['Name'] == 'BR_corner'].item())           # The bottom right corner z coordinate
    ULX = int(PS['Value'][PS['Name'] == 'UL_corner'].item())                # The top left corner x coordinate
    ULZ = int(PS['Add_value1'][PS['Name'] == 'UL_corner'].item())           # The top left corner z coordinate
    URX = int(PS['Value'][PS['Name'] == 'UR_corner'].item())                # The top right corner x coordinate
    URZ = int(PS['Add_value1'][PS['Name'] == 'UR_corner'].item())           # The top right corner z coordinate
    Bounding = [BLX, BLZ, ULX, ULZ, URX, URZ, BRX, BRZ]
    
    LineDist = PS['Value'][PS['Name'] == 'LineDistribution'].item()                 # The distribution for the number of lines in an order
    LineOrderDist = getLineItemDistParas(PS, LineDist, 'LineDistribution')
    
    QuantityDist = PS['Value'][PS['Name'] == 'QuantityDistribution'].item()         # The distribution for the quantity in an order line
    QuantityLineDist = getLineItemDistParas(PS, LineDist, 'QuantityDistribution')
    
    SKUWeights = SkuWeights['Weight'].tolist()
    
    return(Bounding, NumOrders, NumSKUs, SKUWeights, NumLocations, LineOrderDist, QuantityLineDist)

In [None]:
# Define the function to read the SKUWeights from a csv file
def readSKUWeights(FileName = 'SKUWeights.csv'):
    # code to read from csv goes here
    return(SKUWeights)

In [None]:
# Define the function that get the paramter list corresponding to distribution and the domain name
##################################################################################################################
# Input :      Dataframe: The dataframe contains the distribution information
#                   Dist: The distribution type. Default is 'Uniform'
#             DomainName: The domain name for the distribution. Default is 'LineDistribution'
#
# Output:   DistParaList: A list that contains the distribution name and corresponding parameters
##################################################################################################################
def getLineItemDistParas(Dataframe, Dist = 'Uniform', DomainName = 'LineDistribution'):
    if Dist == 'Uniform':
        Min = int(Dataframe['Add_value1'][Dataframe['Name'] == DomainName].item())   # The min value in uniform distribution
        Max = int(Dataframe['Add_value2'][Dataframe['Name'] == DomainName].item())   # The max value in uniform distribution
    DistParaList = [Dist, Min, Max]
    return  DistParaList

In [None]:
# Define the function that provides uniform distribution decimal or integer value
###################################################################################################################
# Input :          IsInt: indicate return integer value or float value. Default value is true
#                    Min: the lower bound of the uniform distribution
#                    Max: the upper bound of the uniform distribution
#                  Digit: the decimal digit. Default value is 1
#
# Output:          a uniform distribution value
####################################################################################################################
def uniform(Min, Max,IsInt=True, Digit=1 ):
    if IsInt == True:
        return random.randint(Min, Max)
    else:
        return round(random.uniform(Min, Max), Digit)

In [None]:
# Define the function that selects rows in a dataframe based on weights list
#####################################################################################################################
# Input :      Population: the sample population. List
#                 Weights: the weight value associated with the population. List. If weights=None, use uniform 
#                          distribution to select entries
#                    Size: number of samples we want to get
#                 Replace: whether the sample is with or without replacement
#
# Output:            the sample stored as a np.array 
#####################################################################################################################
def sample(Population, Weights, Size, Replace=False):
    if Weights:
        return np.random.choice(a=Population, size=Size, replace=Replace, p=Weights)
    else:
        return np.random.choice(a=Population, size=Size, replace=Replace)

In [None]:
# Define the function that generates the initial dataframe for Order, Sku and Location
####################################################################################################################
# Input :     Rownumber: the number of rows in the generated dataframe
#         DataframeType: specify the dataframe is for Order or Sku or Location. Ex. df_type = 'Order'
#
# Output:            df: the generated dataframe
####################################################################################################################
def dataframeInitial(Rownumber, DataframeType):
    IDName = DataframeType + 'ID'
    # Generate the order dataframe
    df = pd.DataFrame(np.arange(0,Rownumber,1),columns=[IDName])
    # Modify the ID to let it have proper names
    df[IDName] = df[IDName].apply(lambda x: DataframeType+ str(x))
    return df

In [None]:
# Define the function that generates the initial dataframe for Order_Sku 
#####################################################################################################################
# Input :       OrderDataframe: the initial order dataframe
#                 SKUDataframe: the initial sku dataframe
#                   SKUWeights: the weight values for skus
#                LineOrderDist: the parameter list for the distribution of the number of lines in an order. 
#             QuantityLineDist: the parameter list for the distribution of the sku quantity in an order line. 
#
# Output:              df: the order_sku dataframe
######################################################################################################################
def dataframeOrderSKUInitial(OrderDataframe, SKUDataframe, SKUWeights, LineOrderDist, QuantityLineDist):
    NumOrder = len(OrderDataframe)
    ColNames = ['OrderID','SkuID','Quantity']
    df = pd.DataFrame(columns = ColNames)
    Population = SKUDataframe['SkuID'].tolist()
    for i in range(NumOrder):
        # get the lines for the ith order
        if LineOrderDist[0] == 'Uniform':
            Line = uniform(LineOrderDist[1], LineOrderDist[2])
        # create rows associated with the order
        df = df.append([OrderDataframe.iloc[i]]*Line,ignore_index=True)
        # assign SkuID for each row
        df['SkuID'][-Line:] =  sample(Population, SKUWeights, Line)
    # assign Sku quantities for each row
    df['Quantity'] = 0
    if QuantityLineDist[0] == 'Uniform':
        df['Quantity'] = df['Quantity'].apply(lambda x: uniform(QuantityLineDist[1], QuantityLineDist[2]))
    return df

In [None]:
# Define the function that generates the initial dataframe for Sku_Location
##########################################################################################################################
# Input :           SKUDataframe: the initial sku dataframe
#              LocationDataframe: the initial location dataframe
#                        Weights: he weight value associated with the population. List. If Weights=None, use uniform 
#                                 distribution to select entries
#                           Rule: the matching Rule for skus and locations. four Rules can be selected:
#                                   'oto': each sku can only be placed in one location and each location can only hold one sku
#                                   'otm': each sku can be placed in multiple locations but each location can only hold one sku
#                                   'mto': each sku can only be placed in one location but each location can hold multiple skus
#                                   'mtm': each sku can be placed in multiple locations and each location can hold multiple skus
#
# Output:                     df: the initial sku-location dataframe      
###########################################################################################################################
def dataframeSKULocationInitial(SKUDataframe, LocationDataframe, Weights=[], Rule='oto'):
    if Rule == 'oto':
        # each sku can only be placed in one location and each location can only hold one sku
        if len(SKUDataframe) > len(LocationDataframe):
            print('The number of Skus cannot be greater than the number of Locations in One_to_One Rule ')
            return
        df = SKUDataframe.copy()
        ColName = 'LocationID'
        Replace = False
        Population = LocationDataframe['LocationID'].tolist()
    elif Rule == 'otm':
        # each sku can be placed in multiple locations but each location can only hold one sku
        df = LocationDataframe.copy()
        ColName = 'SkuID'
        Replace = True
        Population = SKUDataframe['SkuID'].tolist()
    elif Rule == 'mto':
        # each sku can only be placed in one location but each location can hold multiple skus
        df = SKUDataframe.copy()
        ColName = 'LocationID'
        Replace = True
        Population = LocationDataframe['LocationID'].tolist()
    elif Rule == 'mtm':       
        # each sku can be placed in multiple locations and each location can hold multiple skus
        column_names = ['SkuID','LocationID']
        df = pd.DataFrame(columns = column_names)
        # TBD
    else:
        print('Rule is not recognized,please check the Rule parameter.')
        
    NumRows = len(df)
    df[ColName] = sample(Population, Weights, NumRows, Replace)
    
    return df
    

In [None]:
# Define the function that generates a list of datetime(YYYY-MM-DD HH:MM:SS)
##########################################################################################################################
# Input :             Size: the returned list Size
#                Startdate: the start date. Format 'MM/DD/YYYY HH:MM:SS'
#                  Enddate: the end date. Format 'MM/DD/YYYY HH:MM:SS'
#                 TimeRule: specify the time in a date. Can be 'fixed' or 'random'.
#
# Output:        DateList: the return datetime list
##########################################################################################################################
def datatimeGenerator(Size, Startdate='09/05/2020 00:00:00',Enddate='20/05/2020 00:00:00', TimeRule='fixed'):
    Start = datetime.strptime( Startdate,'%d/%m/%Y %H:%M:%S')
    End = datetime.strptime( Enddate,'%d/%m/%Y %H:%M:%S')
    # get the zero time (00:00:00)
    ZeroTime = datetime(2019,8,10,0,0,0).time()
    AdjustDate = datetime.combine(Start.date(),ZeroTime)
    DaysBetweenDates = (End-Start).days
    DateList = sample(range(DaysBetweenDates),Weights=[], Size=Size, Replace=True).tolist()
    DateList.sort()
    for i in range(Size):
        if TimeRule == 'fixed':
            DateList[i] = Start + dt.timedelta(days=DateList[i])
        elif TimeRule == 'random':
            DateList[i] = AdjustDate + dt.timedelta(days=DateList[i], seconds=random.randrange(86400))
        else:
            print('Cannot recognize the TimeRule parameter, please check it.')
    return DateList

## Get the paramters

In [None]:
# whether you want to use external file to pass parameters
External = True

In [None]:
if not External:
    # Set initial values (in case you don't want to use the xlsx file)
    Bounding     = [0, 0, 0, 1000, 1000, 1000, 1000, 0]
    # 0 - BLx; 1 - BLz; 2 - ULx, etc (going around clockwise - BL, UL, UR, BR)
    NumOrders    = 1000
    NumSKUs      = 20000 # Generate an error if the number of SKUs exceeds the number of locations
    SKUWeights   = [] # if empty, all equally likely.  If not empty, verify that the numbers work
    NumLocations = 20000
    LineOrderDist = ['Uniform', 1, 10]
    QuantityLineDist = ['Uniform', 1,  5]
else:
    [Bounding, NumOrders, NumSKUs, SKUWeights, NumLocations, LineOrderDist, QuantityLineDist] = readFromExcel(FileName = 'ParameterSetting.xlsx', SheetName = 'Parameter Setting', SKUWeightsSheet = 'SKU Weights')
# For the random generation, you should have a single function to generate Uniform variates and just pass
# the min,max as parameters.  If we change distributions, we can then just create a separate function base
# on the type

## Generate initial dataframes

In [None]:
# Generate the order, sku and location initial dataframe
OrderInitial = dataframeInitial(NumOrders,'Order')
SKUInitial = dataframeInitial(NumSKUs, 'Sku')
LocationInitial = dataframeInitial(NumLocations,'Location')
# OrderInitial.head()
# SKUInitial.head()
# LocationInitial.head()

In [None]:
# Generate the initial order-sku dataframe
OrderSKUInitial = dataframeOrderSKUInitial(OrderInitial,SKUInitial,SKUWeights, LineOrderDist, QuantityLineDist)
# Order_Sku_df.head()

In [None]:
# Generate the initial sku-location dataframe
SKULocationInitial = dataframeSKULocationInitial(SKUInitial, LocationInitial,Rule='oto')
# Sku_LocationInitial.head()

## Complete dataframes

### Complete Order dataframe

In [None]:
# Temperory order dataframe
OrderTemperoryDataframe = OrderInitial

In [None]:
# Add release date column
ColName = 'ReleaseDate'
ReleaseList = datatimeGenerator(NumOrders, Startdate='09/05/2020 00:00:00',Enddate='20/05/2020 00:00:00')
OrderTemperoryDataframe[ColName] = ReleaseList

In [None]:
# Add due date column
ColName = 'DueDate'
DueList = datatimeGenerator(NumOrders,Startdate='11/05/2020 23:59:59',Enddate='22/05/2020 23:59:59')
OrderTemperoryDataframe[ColName] = DueList

In [None]:
# Final order dataframe
OrderFinialDataframe = OrderTemperoryDataframe

### Complete Sku dataframe

In [None]:
# Temperory sku dataframe
SKUTemperoryDataframe = SKUInitial

In [None]:
# Add volumn column
ColName = 'Volumn'
VolumnArray = sample(range(10),Weights=[], Size=NumSKUs, Replace=True)
SKUTemperoryDataframe[ColName] = VolumnArray

In [None]:
# Add weight column
ColName = 'Weight'
SKUTemperoryDataframe[ColName] = 0
SKUTemperoryDataframe[ColName] = SKUTemperoryDataframe[ColName].apply(lambda x: uniform(5, 10,IsInt=False, Digit=1 ))

In [None]:
# Final sku dataframe
SKUFinalDataframe = SKUTemperoryDataframe 

### Complete Location dataframe

In [None]:
# Temperory location dataframe
LocationTemperoryDataframe = LocationInitial

In [None]:
# Add X-coordinate value column
ColName = 'Xloc'
LocationTemperoryDataframe[ColName] = 0.0
LocationTemperoryDataframe[ColName] = LocationTemperoryDataframe[ColName].apply(lambda x: uniform(Bounding[0], Bounding[6],IsInt=False, Digit=1 ))

In [None]:
# Add Z-coordinate value column
ColName = 'Zloc'
LocationTemperoryDataframe[ColName] = 0.0
LocationTemperoryDataframe[ColName] = LocationTemperoryDataframe[ColName].apply(lambda x: uniform(Bounding[1], Bounding[3],IsInt=False, Digit=1 ))

In [None]:
# Final Location dataframe
LocationFinalDataframe = LocationTemperoryDataframe

### Complete Order_Sku dataframe

In [None]:
# Temperory order_sku dataframe
OrderSKUTemperoryDataframe = OrderSKUInitial

In [None]:
# Final order_sku dataframe
OrderSKUFinalDataframe = OrderSKUTemperoryDataframe

### Complete Sku_Location dataframe

In [None]:
# Temperory sku_location dataframe
SKULocationTemperoryDataframe = SKULocationInitial

In [None]:
# Final sku_location dataframe
SKULocationFinalDataframe = SKULocationTemperoryDataframe

## Output the csv files

In [None]:
# Output dataframes to csv files
OrderFinialDataframe.to_csv('Orders.csv',index=False)
SKUFinalDataframe.to_csv('Skus.csv',index=False)
LocationFinalDataframe.to_csv('Locations.csv',index=False)
OrderSKUFinalDataframe.to_csv('Order_Sku.csv',index=False)
SKULocationFinalDataframe.to_csv('Sku_Location.csv',index=False)