# Load Required Packages

In [26]:
import pandas as pd
import numpy as np

# Load Dummy Data

In [27]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Generate dummy vendors data
vendors = pd.DataFrame({
    'vendor_id': ['V' + str(i) for i in range(1, 301)],
    'vendor_country': np.random.choice(['Country A', 'Country B', 'Country C'], size=300)
})

# Generate dummy factories data
factories = pd.DataFrame({
    'factory_id': ['F' + str(i) for i in range(1, 2001)],
    'factory_country': np.random.choice(['Country A', 'Country B', 'Country C'], size=2000),
    'PCX_MTL_ID': np.random.randint(1000, 9999, size=2000),
    'PP_Type': np.random.choice(['Type1', 'Type2', 'Type3'], size=2000),
    'VOLUME': np.random.randint(50, 200, size=2000)
})

# Generate dummy merged data
merged_df = pd.DataFrame({
    'item_id': np.random.randint(1000, 9999, size=5000),
    'PCX_MTL_ID': np.random.choice(factories['PCX_MTL_ID'], size=5000),
    'vendor_id': np.random.choice(vendors['vendor_id'], size=5000),
    'vendor_country': np.random.choice(vendors['vendor_country'], size=5000),
    'cost_per_unit': np.random.uniform(10, 100, size=5000),
    'PP_Type': np.random.choice(['Type1', 'Type2', 'Type3'], size=5000)
})

# Generate dummy target volume data
target_volume_df = pd.DataFrame({
    'vendor_id': ['V' + str(i) for i in range(1, 301)],
    'target_allocation_pct': np.random.uniform(0.05, 0.15, size=300),
    'acceptable_range_pct': np.random.uniform(5, 10, size=300),
    'PP_Type': np.random.choice(['Type1', 'Type2', 'Type3'], size=300)
})


In [28]:
# Ensure 'PCX_MTL_ID' and 'factory_id' are in both dataframes
merged_df = merged_df.merge(factories[['PCX_MTL_ID', 'factory_id', 'factory_country']], on='PCX_MTL_ID', how='left')

# Check if 'factory_country' needs to be updated from the 'factories' DataFrame
if 'factory_country' in merged_df.columns and 'factory_country_y' in merged_df.columns:
    # Assuming the 'factory_country' from the 'factories' DataFrame is the correct one
    merged_df['factory_country'] = merged_df['factory_country_y']
    merged_df.drop(columns=['factory_country_y', 'factory_country_x'], inplace=True, errors='ignore')


In [29]:
merged_df

Unnamed: 0,item_id,PCX_MTL_ID,vendor_id,vendor_country,cost_per_unit,PP_Type,factory_id,factory_country
0,5598,1072,V119,Country A,95.635379,Type1,F79,Country A
1,6008,9984,V139,Country B,34.452088,Type2,F169,Country A
2,7222,6808,V160,Country B,88.178985,Type2,F810,Country A
3,7222,6808,V160,Country B,88.178985,Type2,F1653,Country B
4,5009,2932,V232,Country C,45.136314,Type1,F983,Country A
...,...,...,...,...,...,...,...,...
6127,6190,8102,V181,Country C,40.969762,Type1,F18,Country B
6128,6432,3555,V232,Country B,26.941659,Type2,F603,Country B
6129,6432,3555,V232,Country B,26.941659,Type2,F1127,Country B
6130,6432,3555,V232,Country B,26.941659,Type2,F1674,Country B


In [30]:
# Assuming merged_df already has 'vendor_country' and 'factory_country' columns
# Compute 'local_for_local' based on the country comparison
merged_df['local_for_local'] = merged_df['vendor_country'] == merged_df['factory_country']


In [31]:
merged_df

Unnamed: 0,item_id,PCX_MTL_ID,vendor_id,vendor_country,cost_per_unit,PP_Type,factory_id,factory_country,local_for_local
0,5598,1072,V119,Country A,95.635379,Type1,F79,Country A,True
1,6008,9984,V139,Country B,34.452088,Type2,F169,Country A,False
2,7222,6808,V160,Country B,88.178985,Type2,F810,Country A,False
3,7222,6808,V160,Country B,88.178985,Type2,F1653,Country B,True
4,5009,2932,V232,Country C,45.136314,Type1,F983,Country A,False
...,...,...,...,...,...,...,...,...,...
6127,6190,8102,V181,Country C,40.969762,Type1,F18,Country B,False
6128,6432,3555,V232,Country B,26.941659,Type2,F603,Country B,True
6129,6432,3555,V232,Country B,26.941659,Type2,F1127,Country B,True
6130,6432,3555,V232,Country B,26.941659,Type2,F1674,Country B,True


In [32]:
constraints_df = pd.DataFrame({
    'item_id': [123, 456],  # example item IDs that are not allowed with specific factories
    'factory_id': ['F1', 'F2']  # corresponding factory IDs that these items should not be allocated to
})


# Define Functions

## Pre-Processing

In [33]:
def preprocess_data(merged_df, constraints_df):
    # Check if constraints DataFrame is provided and not empty
    if constraints_df is not None and not constraints_df.empty:
        # Apply each constraint in the DataFrame
        for _, constraint in constraints_df.iterrows():
            item_id = constraint['item_id']
            factory_id = constraint['factory_id']
            # Filter out rows in merged_df that match both the item_id and factory_id of the constraint
            merged_df = merged_df[~((merged_df['item_id'] == item_id) & (merged_df['factory_id'] == factory_id))]

    return merged_df


## Winning Supplier

In [34]:
def select_winning_supplier(merged_df, factory_demand_df):
    selected_suppliers = []

    for _, factory in factory_demand_df.iterrows():
        factory_id = factory['factory_id']
        
        # Filter merged_df for entries that match the current factory_id
        matching_entries = merged_df[merged_df['factory_id'] == factory_id]

        # Now, 'local_for_local' column should exist in matching_entries
        local_suppliers = matching_entries[matching_entries['local_for_local']]
        non_local_suppliers = matching_entries[~matching_entries['local_for_local']]

        # Prioritize local suppliers
        for suppliers in [local_suppliers, non_local_suppliers]:
            if not suppliers.empty:
                winning_supplier = suppliers.sort_values(by='cost_per_unit').iloc[0]
                selected_suppliers.append({
                    'item_id': winning_supplier['item_id'],
                    'vendor_id': winning_supplier['vendor_id'],
                    'factory_id': factory_id,
                    'cost_per_unit': winning_supplier['cost_per_unit'],
                    'local_for_local': winning_supplier['local_for_local'],
                    'PP_Type': winning_supplier.get('PP_Type', 'N/A')  # Assuming PP_Type is needed
                })
                break

    return pd.DataFrame(selected_suppliers)


## Winning Factory

In [35]:
def allocate_to_factories(selected_suppliers, target_volume_df, factory_demand_df):
    allocation_result = []

    for _, supplier in selected_suppliers.iterrows():
        item_id = supplier['item_id']
        vendor_id = supplier['vendor_id']
        factory_id = supplier['factory_id']
        cost_per_unit = supplier['cost_per_unit']  # Capture cost_per_unit from selected_suppliers

        target_volume_data = target_volume_df[target_volume_df['vendor_id'] == vendor_id]
        factory_data = factory_demand_df[factory_demand_df['factory_id'] == factory_id]

        if not target_volume_data.empty and not factory_data.empty:
            target_allocation = target_volume_data['target_allocation_pct'].iloc[0] * factory_data['VOLUME'].iloc[0]

            allocation_result.append({
                'item_id': item_id,
                'factory_id': factory_id,
                'vendor_id': vendor_id,
                'allocated_units': target_allocation,
                'cost_per_unit': cost_per_unit,  # Include cost_per_unit in the results
                'PP_Type': supplier['PP_Type'],  # Ensure PP_Type is also carried forward
                'local_for_local': selected_suppliers['local_for_local']
            })

    return pd.DataFrame(allocation_result)


# Execute the Workflow

In [36]:
processed_merged_df = preprocess_data(merged_df, constraints_df)


In [37]:
winning_suppliers = select_winning_supplier(processed_merged_df, factories)
allocation_results = allocate_to_factories(winning_suppliers, target_volume_df, factories)

# Explore Results

In [38]:
print("Basic Statistics:")
print(allocation_results.describe())

Basic Statistics:
           item_id  allocated_units  cost_per_unit
count  1871.000000      1871.000000    1871.000000
mean   5483.092998        12.661387      45.427824
std    2628.594206         5.663256      24.831446
min    1002.000000         2.752763      10.001847
25%    3132.000000         8.090614      24.023961
50%    5506.000000        11.749109      40.811635
75%    7803.000000        16.298073      63.910534
max    9998.000000        29.704324      99.979703


In [49]:
print("\nAllocation Distribution by Factory:")
factory_allocation = allocation_results.groupby('factory_id')['allocated_units'].sum()
print(factory_allocation)


Allocation Distribution by Factory:
factory_id
F1      3878.042293
F10     4607.551696
F100    5118.095057
F101    2474.310855
F102    1715.219975
           ...     
F95     3002.355323
F96     3430.439949
F97     4169.044318
F98     2971.080789
F99     2085.169381
Name: allocated_units, Length: 500, dtype: float64


In [50]:
print("\nAllocation Distribution by Vendor:")
vendor_allocation = allocation_results.groupby('vendor_id')['allocated_units'].sum()
print(vendor_allocation)


Allocation Distribution by Vendor:
vendor_id
V1      17127.956908
V10      9987.048819
V100    12859.364975
V101     7472.764827
V102     6221.379482
            ...     
V95      6556.996257
V96      3700.522984
V97     10513.823824
V98     21134.968879
V99      7280.523978
Name: allocated_units, Length: 200, dtype: float64


In [51]:
allocation_results['total_cost'] = allocation_results['allocated_units'] * allocation_results['cost_per_unit']
print("\nTotal Cost per Factory:")
total_cost_per_factory = allocation_results.groupby('factory_id')['total_cost'].sum()
print(total_cost_per_factory)


Total Cost per Factory:
factory_id
F1      212651.729653
F10     258162.422284
F100    280736.444740
F101    135720.267987
F102     96104.259390
            ...      
F95     164633.597151
F96     192208.518781
F97     228679.355649
F98     162918.663847
F99     116832.629063
Name: total_cost, Length: 500, dtype: float64


In [52]:
print("\nLocal-for-Local Allocations:")
local_allocations = allocation_results['local_for_local'].value_counts()
print(local_allocations)


Local-for-Local Allocations:
local_for_local
0         True
1         True
2         True
3         True
4         True
          ... 
167409    True
167410    True
167411    True
167412    True
167413    True
Name: local_for_local, Length: 167414, dtype: bool    167414
Name: count, dtype: int64


In [53]:
print("\nDetailed Allocation per Item:")
item_allocation = allocation_results.groupby(['item_id', 'factory_id', 'vendor_id']).agg({'allocated_units': 'sum', 'total_cost': 'sum'})
print(item_allocation)


Detailed Allocation per Item:
                              allocated_units   total_cost
item_id factory_id vendor_id                              
1       F1         V192             15.112371   293.840656
        F104       V192             18.159220   353.082723
        F109       V192             18.402968   357.822089
        F11        V192             12.065522   234.598588
        F113       V192              8.896799   172.986838
...                                       ...          ...
1000    F82        V179             10.301894   984.323300
        F87        V179              8.295032   792.572008
        F90        V179             22.343070  2134.831053
        F96        V179             13.646665  1303.908787
        F99        V179              8.295032   792.572008

[167414 rows x 2 columns]


In [54]:
print("\nAllocation Distribution by PP_Type:")
pp_type_allocation = allocation_results.groupby('PP_Type')['allocated_units'].sum()
print(pp_type_allocation)



Allocation Distribution by PP_Type:
PP_Type
Type1    552967.905639
Type2    516970.182691
Type3    482117.433164
Type4    519857.962821
Name: allocated_units, dtype: float64
