In [166]:
import random
import numpy  as np
import pandas as pd
from scipy.spatial import distance_matrix
from ortools.graph import pywrapgraph
from sklearn.neighbors         import KNeighborsRegressor
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

### Data Processing

Extracting the data from the data file.

In [167]:
#Returns a list of integer lists.
def list_lines(file_name):
    with open(file_name) as file:
        lines = file.read().splitlines()
    line_list = [[int(n) for  n in ll.split()] for ll in lines]
    return line_list

#Provides the dividing line between warehouse and order sections in the line list.
def find_wh_lines(line_list):
    wh_count = line_list[3][0]
    wh_endline = (wh_count*2)+4
    return wh_endline

#Returns a 2-d array of products(rows) by warehouses(columns).
def get_inventories(line_list):
    wh_endline = find_wh_lines(line_list)
    invs = line_list[5:wh_endline+1:2]
    supply = np.array(invs).transpose()
    return supply.astype(np.int16)

#Returns a 2-d array of products(rows) by orders(columns).
def get_orders(line_list):
    wh_endline = find_wh_lines(line_list)
    demand = np.zeros((line_list[1][0], line_list[wh_endline][0]),dtype=np.int16)
    orders = line_list[wh_endline+3::3]
    for i,ord in enumerate(orders):
        for prod in ord:
            demand[prod, i] += 1
    return demand.astype(np.int16)

#Returns the locations of Warehouses and the Orders.
def get_locs(line_list):
    wh_endline = find_wh_lines(line_list)
    wh_locs = np.array(line_list[4:wh_endline:2])
    cust_locs = np.array(line_list[wh_endline+1::3])
    return wh_locs.astype(np.int16), cust_locs.astype(np.int16)

In [168]:
line_list = list_lines('data.txt')
supply = get_inventories(line_list) #Item vs Qty present in each warehouse (shape: 400x10)
demand = get_orders(line_list) #Item vs Qty required in each order (shape: 400x1250)
warehouse_loctions, order_loctions = get_locs(line_list) #Location of warehouse and orders
distances_mtx = distance_matrix(order_locs, warehouse_locs) # Order vs Warehouse Distance (shape: 1250x10)

In [220]:
#rough work
#distances = distance_matrix(order_loctions, warehouse_loctions)# Order vs Warehouse Distance (shape: 1250x10)
a = [[1, 1, 1],[2,2,2]]
b = [[3,3],[4,4],[5,5]]
res = np.matmul(a,b)
print(res)

[[12 12]
 [24 24]]


In [170]:
def check_feedback_of_past_k_nearest_orders(assignments):
    df = pd.DataFrame(assignments, columns=['warehouse', 'order', 'order_location', 'product', 'qty', 'distance', 'feedback'])
    predict_tuple = df.iloc[-1] #Last row (recently added assignment)
    data_df = df.iloc[:-1 , :] #Removing predict_tuple from df
    #Filtering out the previous assignments corresponding to current warehouse under consideration
    warehouse_filtered_df = data_df.loc[data_df.warehouse == predict_tuple.warehouse]
    
    #If the number of data points is less the required number of nearest neighbour then skip the warehouse penalization
    if warehouse_filtered_df.shape[0] < 5:
        return False
    
    #weights='distance' -> Close the point, Higher the influence
    knn_regressor = KNeighborsRegressor(n_neighbors=5, weights='distance')
    knn_regressor.fit(warehouse_filtered_df.order_location.values.tolist(), warehouse_filtered_df.feedback.values.tolist())
    value = knn_regressor.predict([predict_tuple.order_location])
    #if the collective feedback of the past k nearest neighbour is less than 3 then the current warehouse needs to be peanlized
    return value[0] < 3

In [None]:
def get_penalized_distances(assignments,penalize_list, distances):
    assign_df   = pd.DataFrame(assignments,   columns=['warehouse', 'order', 'order_location', 'product','qty', 'distance', 'feedback'])
    penalize_df = pd.DataFrame(penalize_list, columns=['warehouse', 'order', 'order_location', 'product','qty', 'distance', 'feedback'])
    
    assign_group   = assign_df.groupby('warehouse')
    penalize_group = penalize_df.groupby('warehouse')
    
    ratio = np.divide(penalize_group['order'].count().values.tolist(), assign_group['order'].count().values.tolist()).tolist()
    threshold_exceed_bool  = pd.Series(ratio) > 0.4
    ratio_df     = pd.DataFrame(ratio, columns=['ratio'])
    new_ratio_df = pd.DataFrame(np.repeat(1.0,10), columns=['ratio'])
    new_ratio_df[threshold_exceed_bool] += ratio_df

    penalize_matrix = #shape: 10x10
    result = np.multiply(distances, penalize_matrix) #(1250x10 matmul 10x10 = shape: 1250x10)
    return result

In [171]:
def assign_warehouses_to_orders(supply, warehouse_locs, demand, order_locs):
    """ OR-tools function to assign warehouses to orders using a max-flow min-cost solver. 
        Numbering scheme is as follows:
        warehouses = 1250 to 1259
        orders = 0 to 1249
    """
    assignments = []
    count = 0
    distances = distance_matrix(order_locs, warehouse_locs) # Order vs Warehouse Distance (shape: 1250x10)
    start_nodes = np.repeat(np.arange(1250,1260), 1250).tolist() # eg. [1, 1, 2, 2, 3, 3]
    end_nodes   = np.tile(np.arange(0,1250), 10).tolist()        # eg. [1, 2, 3, 1, 2, 3]
    
    for i in range(400):  # iterate over products
        item_count = 0
        penalize_list = []
        # Demand is being negated because they represent the sink. Quantity needs to flow from source to the sink
        supplies   = np.negative(demand[i]).tolist() + supply[i].tolist() # Concatinating demand and supplies
        capacities = np.tile(demand[i], 10).tolist()
        distances = get_penalized_distances(assignments, penalize_list, distances)
        costs      = np.transpose(distances).ravel().astype(int).tolist() # Flattening the distance matrix 
        # Build solver
        min_cost_flow = pywrapgraph.SimpleMinCostFlow()

        for s in range(len(start_nodes)):
            min_cost_flow.AddArcWithCapacityAndUnitCost(start_nodes[s], end_nodes[s], capacities[s], costs[s])
            
        for s in range(len(supplies)):
            min_cost_flow.SetNodeSupply(s, supplies[s])

        # Optimal Warehouse for Product 'i' for 'n' Orders.
        if min_cost_flow.SolveMaxFlowWithMinCost() == min_cost_flow.OPTIMAL:
            for arc in range(min_cost_flow.NumArcs()):
                if min_cost_flow.Flow(arc) > 0:
                    warehouse = min_cost_flow.Tail(arc) - 1250
                    order = min_cost_flow.Head(arc)
                    order_location = order_loctions[order].tolist()
                    product = i
                    qty = min_cost_flow.Flow(arc)
                    cost = min_cost_flow.UnitCost(arc)
                    feedback = random.randint(1,5)
                    
                    assign = [warehouse, order, order_location, product, qty, cost, feedback]
                    assignments.append(assign)
                    item_count += qty
                    
                    if(check_feedback_of_past_k_nearest_orders(assignments)):
                        penalize_list.append(assign)

        count += item_count
    
    print(supply.sum(), demand.sum(), count)              
    return np.array(assignments, dtype=object), np.array(penalize_list, dtype=object)

In [172]:
%%time
assignments, penalize_list = assign_warehouses_to_orders(supply, warehouse_loctions, demand, order_loctions)
assign_df   = pd.DataFrame(assignments,   columns=['warehouse', 'order', 'order_location', 'product','qty', 'distance', 'feedback'])
penalize_df = pd.DataFrame(penalize_list, columns=['warehouse', 'order', 'order_location', 'product','qty', 'distance', 'feedback'])

14576 9368 9368
CPU times: user 2min 18s, sys: 1.63 s, total: 2min 20s
Wall time: 2min 21s


In [173]:
print(penalize_df.shape)
print(assign_df.shape)

(4046, 7)
(9236, 7)


In [184]:
penalize_df.head()

Unnamed: 0,warehouse,order,order_location,product,qty,distance,feedback
0,5,604,"[110, 565]",1,1,378,1
1,5,694,"[141, 361]",1,1,172,1
2,5,858,"[193, 341]",1,1,148,5
3,4,1074,"[210, 345]",2,1,21,3
4,4,1077,"[220, 350]",2,1,24,2


In [201]:
group = penalize_df.groupby('warehouse')
group['order'].count()

warehouse
0    506
1    210
2    302
3    602
4    426
5    471
6    465
7    370
8    234
9    460
Name: order, dtype: int64

In [211]:
assign_group = assign_df.groupby('warehouse')
assign_group['order'].count().values.tolist()

[1011, 531, 697, 1316, 974, 1058, 1081, 895, 627, 1046]

In [234]:
ans = np.divide(group['order'].count().values.tolist(),assign_group['order'].count().values.tolist()).tolist()
print(ans)

[0.5004945598417408, 0.3954802259887006, 0.4332855093256815, 0.4574468085106383, 0.43737166324435317, 0.44517958412098296, 0.4301572617946346, 0.4134078212290503, 0.37320574162679426, 0.4397705544933078]


In [247]:
ans_bool = pd.Series(ans) > 0.4
ans_df =  pd.DataFrame(ans, columns=['ratio'])
ans_new_df = pd.DataFrame(np.repeat(1.0,10), columns=['ratio'])
ans_new_df[ans_bool] += ans_df
print(ans_new_df.ratio.values.tolist())

[1.5004945598417407, 1.0, 1.4332855093256816, 1.4574468085106382, 1.4373716632443532, 1.445179584120983, 1.4301572617946345, 1.4134078212290504, 1.0, 1.439770554493308]


NameError: name 'array' is not defined