# Load Required Packages

In [6]:
import pandas as pd
import numpy as np

# Load Dummy Data

In [8]:
np.random.seed(42)

# Generate vendors data
vendors = pd.DataFrame({
    'vendor_id': [f'V{i}' for i in range(1, 201)],
    'vendor_country': np.random.choice(['Country A', 'Country B', 'Country C'], size=200)
})

# Generate items data
items = pd.DataFrame({
    'item_id': range(1, 1001),
    'PP_Type': np.random.choice(['Type1', 'Type2', 'Type3', 'Type4'], size=1000)
})

# Merge items with vendors to create a merged dataframe
merged_df = items.copy()
merged_df['vendor_id'] = np.random.choice(vendors['vendor_id'], size=1000)
merged_df['vendor_country'] = merged_df['vendor_id'].map(vendors.set_index('vendor_id')['vendor_country'])
merged_df['cost_per_unit'] = np.random.uniform(10, 100, size=1000)

# Generate factory demand data
factory_demand_df = pd.DataFrame({
    'factory_id': [f'F{i}' for i in range(1, 501)],
    'factory_country': np.random.choice(['Country A', 'Country B', 'Country C'], size=500),
    'VOLUME': np.random.randint(50, 200, size=500)
})

# Generate target volume data for vendors
target_volume_df = pd.DataFrame({
    'vendor_id': [f'V{i}' for i in range(1, 201)],
    'target_allocation_pct': np.random.uniform(0.05, 0.15, size=200),
    'acceptable_range_pct': np.random.uniform(5, 10, size=200),
    'PP_Type': np.random.choice(['Type1', 'Type2', 'Type3', 'Type4'], size=200)
})

# Define constraints
constraints = {
    'unacceptable_pairs': [(np.random.randint(1, 1001), f'F{np.random.randint(1, 501)}') for _ in range(10)]
}


# Define Functions

## Pre-Processing

In [14]:
def preprocess_data(merged_df, factory_demand_df, constraints=None):
    # Example: Apply constraints based on business logic linking items to factories
    if constraints and 'unacceptable_pairs' in constraints:
        # Define logic to determine how items and factories are related
        for item_id, factory_id in constraints['unacceptable_pairs']:
            # Apply the constraint based on business logic
            pass  # Placeholder for actual logic

    return merged_df


## Winning Supplier

In [34]:
def select_winning_supplier(merged_df, factory_demand_df):
    selected_suppliers = []

    for _, factory in factory_demand_df.iterrows():
        factory_id = factory['factory_id']
        factory_country = factory['factory_country']

        # Assume merged_df has a 'PP_Type' column
        for _, item in merged_df.iterrows():
            if item['vendor_country'] == factory_country:
                selected_suppliers.append({
                    'item_id': item['item_id'],
                    'vendor_id': item['vendor_id'],
                    'factory_id': factory_id,
                    'cost_per_unit': item['cost_per_unit'],
                    'local_for_local': item['vendor_country'] == factory_country,
                    'PP_Type': item['PP_Type']  # Make sure PP_Type is included here
                })

    return pd.DataFrame(selected_suppliers)


## Winning Factory

In [46]:
def allocate_to_factories(selected_suppliers, target_volume_df, factory_demand_df):
    allocation_result = []

    for _, supplier in selected_suppliers.iterrows():
        item_id = supplier['item_id']
        vendor_id = supplier['vendor_id']
        factory_id = supplier['factory_id']
        cost_per_unit = supplier['cost_per_unit']  # Capture cost_per_unit from selected_suppliers

        target_volume_data = target_volume_df[target_volume_df['vendor_id'] == vendor_id]
        factory_data = factory_demand_df[factory_demand_df['factory_id'] == factory_id]

        if not target_volume_data.empty and not factory_data.empty:
            target_allocation = target_volume_data['target_allocation_pct'].iloc[0] * factory_data['VOLUME'].iloc[0]

            allocation_result.append({
                'item_id': item_id,
                'factory_id': factory_id,
                'vendor_id': vendor_id,
                'allocated_units': target_allocation,
                'cost_per_unit': cost_per_unit,  # Include cost_per_unit in the results
                'PP_Type': supplier['PP_Type'],  # Ensure PP_Type is also carried forward
                'local_for_local': selected_suppliers['local_for_local']
            })

    return pd.DataFrame(allocation_result)


# Execute the Workflow

In [47]:
processed_merged_df = preprocess_data(merged_df, constraints)
winning_suppliers = select_winning_supplier(processed_merged_df, factory_demand_df)
allocation_results = allocate_to_factories(winning_suppliers, target_volume_df, factory_demand_df)

# Explore Results

In [48]:
print("Basic Statistics:")
print(allocation_results.describe())

Basic Statistics:
             item_id  allocated_units  cost_per_unit
count  167414.000000    167414.000000  167414.000000
mean      500.847850        12.375987      55.389121
std       288.585445         5.697068      25.940806
min         1.000000         2.509961      10.289644
25%       251.000000         7.888285      32.228092
50%       501.000000        11.372027      56.627089
75%       751.000000        16.209237      77.326586
max      1000.000000        29.311301      99.947235


In [49]:
print("\nAllocation Distribution by Factory:")
factory_allocation = allocation_results.groupby('factory_id')['allocated_units'].sum()
print(factory_allocation)


Allocation Distribution by Factory:
factory_id
F1      3878.042293
F10     4607.551696
F100    5118.095057
F101    2474.310855
F102    1715.219975
           ...     
F95     3002.355323
F96     3430.439949
F97     4169.044318
F98     2971.080789
F99     2085.169381
Name: allocated_units, Length: 500, dtype: float64


In [50]:
print("\nAllocation Distribution by Vendor:")
vendor_allocation = allocation_results.groupby('vendor_id')['allocated_units'].sum()
print(vendor_allocation)


Allocation Distribution by Vendor:
vendor_id
V1      17127.956908
V10      9987.048819
V100    12859.364975
V101     7472.764827
V102     6221.379482
            ...     
V95      6556.996257
V96      3700.522984
V97     10513.823824
V98     21134.968879
V99      7280.523978
Name: allocated_units, Length: 200, dtype: float64


In [51]:
allocation_results['total_cost'] = allocation_results['allocated_units'] * allocation_results['cost_per_unit']
print("\nTotal Cost per Factory:")
total_cost_per_factory = allocation_results.groupby('factory_id')['total_cost'].sum()
print(total_cost_per_factory)


Total Cost per Factory:
factory_id
F1      212651.729653
F10     258162.422284
F100    280736.444740
F101    135720.267987
F102     96104.259390
            ...      
F95     164633.597151
F96     192208.518781
F97     228679.355649
F98     162918.663847
F99     116832.629063
Name: total_cost, Length: 500, dtype: float64


In [52]:
print("\nLocal-for-Local Allocations:")
local_allocations = allocation_results['local_for_local'].value_counts()
print(local_allocations)


Local-for-Local Allocations:
local_for_local
0         True
1         True
2         True
3         True
4         True
          ... 
167409    True
167410    True
167411    True
167412    True
167413    True
Name: local_for_local, Length: 167414, dtype: bool    167414
Name: count, dtype: int64


In [53]:
print("\nDetailed Allocation per Item:")
item_allocation = allocation_results.groupby(['item_id', 'factory_id', 'vendor_id']).agg({'allocated_units': 'sum', 'total_cost': 'sum'})
print(item_allocation)


Detailed Allocation per Item:
                              allocated_units   total_cost
item_id factory_id vendor_id                              
1       F1         V192             15.112371   293.840656
        F104       V192             18.159220   353.082723
        F109       V192             18.402968   357.822089
        F11        V192             12.065522   234.598588
        F113       V192              8.896799   172.986838
...                                       ...          ...
1000    F82        V179             10.301894   984.323300
        F87        V179              8.295032   792.572008
        F90        V179             22.343070  2134.831053
        F96        V179             13.646665  1303.908787
        F99        V179              8.295032   792.572008

[167414 rows x 2 columns]


In [54]:
print("\nAllocation Distribution by PP_Type:")
pp_type_allocation = allocation_results.groupby('PP_Type')['allocated_units'].sum()
print(pp_type_allocation)



Allocation Distribution by PP_Type:
PP_Type
Type1    552967.905639
Type2    516970.182691
Type3    482117.433164
Type4    519857.962821
Name: allocated_units, dtype: float64
