In [1]:
###############################################################################
# Import packages
###############################################################################
import time
import pandas as pd
import numpy as np
from scipy.stats import skew
import itertools

# Load functions
from bin_packing_functions import generate_bin_packing_instance, solve_bin_packing_with_size_and_weight, fun_all_sums
from bin_packing_functions import fun_shapley_value, fun_convert_time, fun_save_file

In [2]:
###############################################################################
# Generate random instances - settings
###############################################################################
num_items_list = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # All possible instance sizes (number of customers for an instance)
start_value = 1 # <- Der Wert hier wird als Instanz_id reingeschrieben, hab das drin weil ich immer mehrere Skripte Parallel laufen lasse, da können nicht alle bei 0 starten
number_of_instances_per_size = 5 # Determines the number of instances that are generated for each instance size (number of customers)
prints = True # Show prints and interim results during generating
print_details = False

# Define bin capacities and create DataFrame to merge final instances
bin_weight_capacity=10
bin_size_capacity=10
df_final = pd.DataFrame()
run_time = {}

###############################################################################
# START GENERAITING
###############################################################################
for num_items in num_items_list:
    instance_id = 100000 * num_items * start_value # -> Random gesetzt, hauptsache es gibt bei Parallelisierung keine Überschneidung der Indizes
    start = time.time() # Start time count
    if (len(num_items_list) > 1) or (number_of_instances_per_size > 1): prints, print_details = False, False # No prints for more than one instance

    for inumber in range(0, number_of_instances_per_size):
        ###############################################################################
        # Generate a random instance
        ###############################################################################

        instance_df = generate_bin_packing_instance(num_items, bin_weight_capacity, bin_size_capacity)

        # Add instance id as to each row as first column and update instance id for next iteration
        instance_df.insert(0, 'Instance ID', instance_id)
        instance_id += 1

        # Add number of customers as second feature
        instance_df.insert(1, 'Number Items', num_items)

        if (prints == True): print('############### INSTANCE ###############'); display(instance_df)

        ###############################################################################
        # Add features single bin utilization of all items and combinability features
        ###############################################################################

        # Extract weight and size values of the items as well as the bin weight and size capacities
        weights = instance_df['Item Weight']
        sizes = instance_df['Item Size']
        bin_weight = instance_df.loc[0, 'Bin Weight']
        bin_size = instance_df.loc[0, 'Bin Size']

        # Compute sum, product and quotient of weight and size for all items
        instance_df['Weight Size Sum'] = weights + sizes
        instance_df['Item Volume'] = weights * sizes
        instance_df['Item Density'] = weights / sizes

        # Compute the single bin utilization for all items in terms of weight, size and total bin utilization
        instance_df['Item Bin Utilization Weight'] = weights / bin_weight
        instance_df['Item Bin Utilization Size'] = sizes / bin_size
        instance_df['Item Total Bin Utilization'] = (weights + sizes) / (bin_weight + bin_size)

        # Compute the number of possible item combinations to put in a bin for all items in terms of weight, size and total bin utilization
        num_combinations_list = {'Weight': [], 'Size': [], 'Total': []}
        num_perfect_combinations_list = {'Weight': [], 'Size': [], 'Total': []}

        for index, values in enumerate(zip(weights, sizes)):
            # Weight combinability
            weight = values[0]
            remaining_bin_weight = bin_weight - weight
            remaining_item_weights = [weights[i] for i in list(weights.index) if i is not index]
            all_sums_weights = fun_all_sums(remaining_item_weights)
            num_combinations_weight = np.sum([i <= remaining_bin_weight for i in all_sums_weights])
            num_perfect_combinations_weight = np.sum([i == remaining_bin_weight for i in all_sums_weights])
            num_combinations_list['Weight'] += [num_combinations_weight]
            num_perfect_combinations_list['Weight'] += [num_perfect_combinations_weight]

            # Size combinability
            size = values[1]
            remaining_bin_size = bin_size - size
            remaining_item_sizes = [sizes[i] for i in list(sizes.index) if i is not index]
            all_sums_sizes = fun_all_sums(remaining_item_sizes)
            num_combinations_size = np.sum([i <= remaining_bin_size for i in all_sums_sizes])
            num_perfect_combinations_size = np.sum([i == remaining_bin_size for i in all_sums_sizes])
            num_combinations_list['Size'] += [num_combinations_size]
            num_perfect_combinations_list['Size'] += [num_perfect_combinations_size]

            # Total combinability
            num_combinations_total = np.sum([(weight <= remaining_bin_weight) & (size <= remaining_bin_size) 
                                    for weight, size in zip(all_sums_weights, all_sums_sizes)])
            num_perfect_combinations_total = np.sum([(weight == remaining_bin_weight) & (size == remaining_bin_size) 
                                    for weight, size in zip(all_sums_weights, all_sums_sizes)])
            num_combinations_list['Total'] += [num_combinations_total]
            num_perfect_combinations_list['Total'] += [num_perfect_combinations_total] 

            # Print detailed information about the computation of the features
            if (print_details == True):
                if (index == 0): print('############### COMBINABILITY FEATURES ###############')
                print('\n- Item {}:\n  Remaining weight: {} - {} = {}\n  Remaining item weights: {}'.format(index+1, bin_weight, weight, remaining_bin_weight, remaining_item_weights))
                print('  Number of possible bin combinations (weight): {}'.format(num_combinations_weight))
                print('  Number of bin combinations for perfect utilization (weigth): {}'.format(num_perfect_combinations_weight))
                print('  Remaining size: {} - {} = {}\n  Remaining item sizes: {}'.format(bin_size, size, remaining_bin_size, remaining_item_sizes))
                print('  Number of possible bin combinations (size): {}'.format(num_combinations_size))
                print('  Number of bin combinations for perfect utilization (size): {}'.format(num_perfect_combinations_size))
                print('=> Number of possible total bin combinations:', num_combinations_total)
                print('=> Number of total bin combinations for perfect utilization:', num_perfect_combinations_total)

        # Add results as columns to the DataFrame
        instance_df.loc[:, 'Weight Bin Combinations'] = num_combinations_list['Weight']
        instance_df.loc[:, 'Size Bin Combinations'] = num_combinations_list['Size']
        instance_df.loc[:, 'Total Bin Combinations'] = num_combinations_list['Total']
        instance_df.loc[:, 'Perfect Weight Bin Combinations'] = num_perfect_combinations_list['Weight']
        instance_df.loc[:, 'Perfect Size Bin Combinations'] = num_perfect_combinations_list['Size']
        instance_df.loc[:, 'Perfect Total Bin Combinations'] = num_perfect_combinations_list['Total']

        if (prints == True): display(instance_df)

        ###############################################################################
        # Add quantile and percentile features
        ###############################################################################

        # Compute quantile values of weight and size for all items (A quantile of 0.4 means that the weight is greater or equal to 40% of all weights)
        weight_quantile_values = [np.sum([i >= j for j in weights]) / num_items for i in weights] # e.g. weights=[4, 5, 3, 5, 4] => [0.6, 1.0, 0.2, 1.0, 0.6]
        size_quantile_values = [np.sum([i >= j for j in sizes]) / num_items for i in sizes] # e.g. sizes=[2, 2, 3, 5, 6] => [0.4, 0.4, 0.6, 0.8, 1.0]

        instance_df['Weight Quantile Values'] = weight_quantile_values
        instance_df['Size Quantile Values'] = size_quantile_values

        # Compute (0%,) 25%, 50%, 75% and (100%) percentile for weight and size values
        percentile_dict = {'Weight': {}, 'Size': {}}
        for attribute, values in zip(['Weight', 'Size'], [weights, sizes]):
            percentiles = [25, 50, 75] # Alternatively: [0, 25, 50, 75, 100]
            for index, percentile in enumerate(percentiles):
                result = np.percentile(values, percentile)
                percentile_dict[attribute][percentile] = result

                # Add percentile as column to the DataFrame
                instance_df[str(percentile) + '% Percentile ' + attribute] = result
            
                # Print percentiles for weight and size values
                if (print_details == True):
                    if (attribute == 'Weight') & (index == 0): print('\n############### PERCENTILE AND QUANTILE FEATURES ###############')
                    if (index == 0): print(attribute + ':', np.sort(values))
                    print('  {} pct. percentil: {}'.format(percentile, result))

        # Divide weight and size by the percentiles and add the values as columns
        for attribute, values in zip(['Weight', 'Size'], [weights, sizes]):
            for percentile in percentiles:
                result = [value / percentile_dict[attribute][percentile] for value in values]
                instance_df[attribute + ' / {}% Percentile'.format(percentile)] = result

        # Define columns for displaying the instance
        instance_features = ['Instance ID', 'Number Items', 'Item ID', 'Item Weight', 'Item Size', 'Bin Weight', 'Bin Size']
        quantile_features = ['Weight Quantile Values', 'Size Quantile Values']
        percentile_features = ['25% Percentile Weight', '50% Percentile Weight', '75% Percentile Weight', 
                               '25% Percentile Size', '50% Percentile Size', '75% Percentile Size',
                               'Weight / 25% Percentile', 'Weight / 50% Percentile', 'Weight / 75% Percentile', 
                               'Size / 25% Percentile', 'Size / 50% Percentile', 'Size / 75% Percentile']

        if (prints == True):
            print('\n############### PERCENTILE AND QUANTILE FEATURES ###############')
            display(instance_df[instance_features[3:] + quantile_features + percentile_features])

        ###############################################################################
        # Add statistics of TSP/CVRP as features
        ###############################################################################

        # Extract weight and size values of the items
        weights = instance_df['Item Weight']
        sizes = instance_df['Item Size']

        # Compute further statistics for weights and sizes of the items and add each of them as a column
        for statistic in [np.sum, np.mean, np.std, np.max, np.min]:
            instance_df['Weight ' + statistic.__name__.capitalize()] = statistic(weights)
            instance_df['Size ' + statistic.__name__.capitalize()] = statistic(sizes)

        # Calculate the correlation between weights and sizes of the items as well as the skewness for weights and sizes and add statistics to each row as a column
        instance_df['Correlation'] = np.corrcoef(weights, sizes)[0, 1] # Select first row and second column of correlation matrix
        instance_df['Skewness Weight'] = skew(weights)
        instance_df['Skewness Size'] = skew(sizes)

        # Define columns for displaying the instance
        statistical_features = ['Weight Sum', 'Size Sum', 'Weight Mean', 'Size Mean', 'Weight Std', 'Size Std', 'Weight Max', 
                                'Size Max', 'Weight Min', 'Size Min', 'Correlation', 'Skewness Weight', 'Skewness Size']
        
        if (prints == True): print('\n############### STATISTICAL FEATURES ###############'); display(instance_df[instance_features + statistical_features])

        ###############################################################################
        # Solve instance
        ###############################################################################

        total_bins = solve_bin_packing_with_size_and_weight(instance_df)
        if (prints == True): print('\n############### SOLVE INSTANCE ###############'); print('Total bins:', total_bins)

        ###############################################################################
        # Add bin utilizations of optimal solution as features
        ###############################################################################

        # Extract necessary information out of the instance DataFrame
        weight_sum = instance_df.loc[0, 'Weight Sum']
        size_sum = instance_df.loc[0, 'Size Sum']
        bins_weight_capacity = bin_weight * total_bins
        bins_size_capacity = bin_size * total_bins

        # Compute final bin utilization of optimal solution and add the three results as columns to the DataFrame
        final_bin_utilization_weight = weight_sum / bins_weight_capacity
        final_bin_utilization_size = size_sum / bins_size_capacity
        instance_df['Final Bin Utilization Weight'] = final_bin_utilization_weight
        instance_df['Final Bin Utilization Size'] = final_bin_utilization_size
        instance_df['Final Total Bin Utilization'] = (final_bin_utilization_weight + final_bin_utilization_size) / 2
        
        # Define columns for displaying the instance
        utilization_features = ['Final Bin Utilization Weight', 'Final Bin Utilization Size', 'Final Total Bin Utilization']

        if (prints == True): print('\n############### Utilization Features ###############'); display(instance_df[instance_features + utilization_features])

        ###############################################################################
        # Calculate total costs for all subsets
        ###############################################################################

        # Determine all subsets of all items
        all_items = range(1, num_items+1)
        list_of_all_subsets = []

        for item in all_items:
            subsets = itertools.combinations(all_items, item)
            for subset in subsets:
                list_of_all_subsets.append(subset)

        # Check if the correct number of subsets was created (number of possible subsets: 2^n -1)
        if (prints == True): print('Correct number of subsets:', len(list_of_all_subsets) == 2**num_items - 1)

        # Initialize dictionary with subset total cost / total number of bins
        subset_total_costs = {}

        # Evaluate total cost / total number of bins for each subset and store in dictionary
        for subset in list_of_all_subsets:
            # Select rows of the items contained in the subset and solve the subset instance
            subset_instance = instance_df[instance_df['Item ID'].isin(subset)]
            sub_total_bins = solve_bin_packing_with_size_and_weight(subset_instance)

            # Add the total costs to the dictionary
            subset_total_costs[subset] = sub_total_bins

        ###############################################################################
        # Calculate marginal costs (solve instance without item)
        ###############################################################################

        # Initialize an empty list to store the marginal costs
        all_marginal_costs = []

        # Iterate over all customers to get the costs of the subset without the customer and compute the marginal costs
        for i in all_items:
            set_without_customer = set(all_items) - set([i])
            bins_without_customer = subset_total_costs[tuple(set_without_customer)]
            marginal_costs = total_bins - bins_without_customer

            if (print_details == True):
                if (i == 1): print('\n############### FEATURE MARGINAL COSTS ###############')
                print('  - Item {}\n      Set without item: {}\n      Costs without item: {}'.format(i, set_without_customer, bins_without_customer))
                print('      Marginal costs/bins: {} - {} = {}'.format(total_bins, bins_without_customer, marginal_costs))

            # Append the marginal cost to the list
            all_marginal_costs.append(marginal_costs)

        # Add the marginal costs to the DataFrame and also a column for total bins
        instance_df['Marginal Costs/Bins'] = all_marginal_costs
        instance_df['Total Bins'] = total_bins

        ###############################################################################
        # Calculate Shapley values
        ###############################################################################

        # Calculate Shapley value for each item
        shapley_values = []
        for i in range(1, num_items+1):
            shapley_value = fun_shapley_value(player_index=i, characteristic_function=subset_total_costs, prints=print_details)
            shapley_values.append(shapley_value)

        # Append Shapley values as column
        instance_df['Shapley Value'] = shapley_values

        # Define columns for displaying the instance
        cost_features = ['Marginal Costs/Bins', 'Total Bins', 'Shapley Value']

        if (prints == True): print('\n############### FEATURE MARGINAL COSTS AND SHAPLEY VALUES ###############'); display(instance_df[instance_features + cost_features])

        ###############################################################################
        # Add ratio features
        ###############################################################################

        # Define features for which the ratio should be computed
        no_ratio_features = ['Instance ID', 'Number Items', 'Item ID', 'Bin Weight', 
                             'Bin Size', 'Total Bins', 'Shapley Value'] + percentile_features[:6] + statistical_features + utilization_features
        ratio_features = [i for i in instance_df.columns if i not in no_ratio_features]

        # Compute mean of all ratio_features (without the depot) and divide their values by mean to obtain ratio column which is inserted before the Shapley value
        for feature in ratio_features:
            mean = np.mean(instance_df[feature])
            if (mean != 0): ratio = instance_df[feature] / mean # When there is no perfect bin combination for all items the mean would be 0 -> ratio = np.nan
            else: ratio = 0
            instance_df.insert(loc=len(instance_df.columns) - 1, column=feature + str(' Ratio'), value=ratio)
        
        if (prints == True):
            print('\n############### RATIO FEATURES ###############')
            print('No ratio features:\n', no_ratio_features)
            display(instance_df[[i + str(' Ratio') for i in ratio_features]])

        # Merge instances
        df_final = pd.concat([df_final, instance_df], ignore_index=True)

    # Stop time count for instance size and make print to show progress during generation
    run_time[num_items] = time.time() - start
    if (num_items_list[-1] > 10): print('- Instance size {} completed! Run time: {}'.format(num_items, fun_convert_time(seconds=run_time[num_items])))

    # View final instance when there was only one generated per instance size
    if (len(num_items_list) == 1) and (number_of_instances_per_size == 1): print('\n############### FINAL INSTANCE ###############'); display(instance_df)

    # Else view run times and final DataFrame with merged instances
    elif (instance_id == (100000 * num_items_list[-1] * start_value) + number_of_instances_per_size):
        print('-> Total run time: ', fun_convert_time(seconds=sum(run_time.values())))
        #display({'Instance size: {} run time'.format(key): fun_convert_time(seconds=value) for key, value in run_time.items()})
        display(df_final)

# Save file
fun_save_file(data=df_final, subfolder_path='..\\..\\01_data\\03_bin_packing', name='bin_packing_instances.xlsx')

Set parameter Username
Academic license - for non-commercial use only - expires 2025-04-04
- Instance size 5 completed! Run time: 0s
- Instance size 6 completed! Run time: 1s
- Instance size 7 completed! Run time: 2s
- Instance size 8 completed! Run time: 6s
- Instance size 9 completed! Run time: 12s
- Instance size 10 completed! Run time: 25s
- Instance size 11 completed! Run time: 1m, 0s
- Instance size 12 completed! Run time: 2m, 7s
- Instance size 13 completed! Run time: 4m, 16s
- Instance size 14 completed! Run time: 10m, 56s
-> Total run time:  19m, 10s


Unnamed: 0,Instance ID,Number Items,Item ID,Item Weight,Item Size,Bin Weight,Bin Size,Weight Size Sum,Item Volume,Item Density,...,Weight Quantile Values Ratio,Size Quantile Values Ratio,Weight / 25% Percentile Ratio,Weight / 50% Percentile Ratio,Weight / 75% Percentile Ratio,Size / 25% Percentile Ratio,Size / 50% Percentile Ratio,Size / 75% Percentile Ratio,Marginal Costs/Bins Ratio,Shapley Value
0,500000,5,1,3,2,10,10,5,6,1.500000,...,1.176471,0.625000,1.071429,1.071429,1.071429,0.555556,0.555556,0.555556,0.0,0.283333
1,500000,5,2,1,2,10,10,3,2,0.500000,...,0.588235,0.625000,0.357143,0.357143,0.357143,0.555556,0.555556,0.555556,0.0,0.283333
2,500000,5,3,3,6,10,10,9,18,0.500000,...,1.176471,1.562500,1.071429,1.071429,1.071429,1.666667,1.666667,1.666667,0.0,0.616667
3,500000,5,4,6,5,10,10,11,30,1.200000,...,1.470588,1.250000,2.142857,2.142857,2.142857,1.388889,1.388889,1.388889,0.0,0.450000
4,500000,5,5,1,3,10,10,4,3,0.333333,...,0.588235,0.937500,0.357143,0.357143,0.357143,0.833333,0.833333,0.833333,0.0,0.366667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,1400004,14,10,1,6,10,10,7,6,0.166667,...,0.355932,1.675214,0.333333,0.333333,0.333333,1.750000,1.750000,1.750000,0.0,0.529171
471,1400004,14,11,2,5,10,10,7,10,0.400000,...,0.711864,1.435897,0.666667,0.666667,0.666667,1.458333,1.458333,1.458333,0.0,0.446567
472,1400004,14,12,3,1,10,10,4,3,3.000000,...,0.949153,0.358974,1.000000,1.000000,1.000000,0.291667,0.291667,0.291667,0.0,0.185201
473,1400004,14,13,4,5,10,10,9,20,0.800000,...,1.305085,1.435897,1.333333,1.333333,1.333333,1.458333,1.458333,1.458333,0.0,0.481416


File saved successfully!
