In [1]:
#import dataset
import pandas as pd
df = pd.read_excel("North Central Sales 6 Months GIT v2.xlsx", sheet_name="North Central")

#remove whitespace from column names for easy access
df.columns = [c.replace(' ', '_') for c in df.columns]
df.columns = [c.replace('$', 'usd') for c in df.columns]
print(df.columns)

Index(['legacy_system_cd', 'legacy_division_cd', 'legacy_product_cd',
       'legacy_product_desc', 'core_item_flag', 'segment', 'PROD_CAT_1_NAME',
       'PROD_CAT_2_NAME', 'PROD_CAT_3_NAME', 'PROD_CAT_4_NAME',
       'legacy_vendor_cd', 'stocking_flag', 'LEGACY_CUSTOMER_CD',
       'saalfeld_customer_flag', 'national_acct_flag', 'ship-to_zip_code',
       'sales_channel', 'qty_6mos', 'cogs_6mos', 'Sales_6_mos', 'picks_6mos',
       'Margin_%', 'net_OH', 'net_OH_usd', 'pallet_quantity', 'item_poi_days',
       'DIOH'],
      dtype='object')


In [2]:
#Goal is to find the worst performing products
import numpy as np

#get the important products
imp_customers = df[df.national_acct_flag == "Y"]
important_products = imp_customers.legacy_product_cd.unique()
print(len(important_products))

products = df.legacy_product_cd.unique()
#print(len(products))

#Create a dictionary of products to sales
prod_to_sales = {}
for p in products:
    prod_to_sales[p] = 0

    
for p,s in zip(df.legacy_product_cd.values, df.Sales_6_mos.values):
    prod_to_sales[p] += s


#get the total value of sales
total_sales = np.nansum(list(prod_to_sales.values()))

#Create a dictionary of products to cogs
prod_to_cogs = {}
for p in products:
    prod_to_cogs[p] = 0

for p, c in zip(df.legacy_product_cd.values, df.cogs_6mos.values):
    prod_to_cogs[p] += c

#create a dictionary of products to profit = sales - cogs
prod_to_profit = {}
for p in products:
    prod_to_profit[p] = 0

for p in products:
    prod_to_profit[p] += prod_to_sales[p] - prod_to_cogs[p]

#get the total profit
total_profit = np.nansum(list(prod_to_profit.values()))

6547


In [3]:
#See how many products are profitable and how many are losing money
positive = {}
for x,y in prod_to_profit.items():
    if y >= 0:
        positive[x] = y

print("Number of Profitable products: ", len(positive.values()))
print("Total number of products: ", len(prod_to_profit.values()))

Number of Profitable products:  15525
Total number of products:  16035


In [4]:
#Create a dictionary of products to total number of picks for that product
prod_to_picks = {}
for p in products:
    prod_to_picks[p] = 0

for p,pk in zip(df.legacy_product_cd.values, df.picks_6mos.values):
    prod_to_picks[p] += pk

In [5]:
#Create a dictionary that maps:
#Warehouse (Division) -> Unique products sold at division

warehouses = df.legacy_division_cd.unique()
warehouse_to_prod = {}

for w in warehouses:
    warehouse_to_prod[w] = []

for w, p in zip(df.legacy_division_cd.values, df.legacy_product_cd.values):
    for ware in warehouses:
        if w == ware and p not in warehouse_to_prod[w]:
            warehouse_to_prod[w].append(p)

#Print warehouse to number of individual different products stored            
for w in warehouses:
    print(w, len(warehouse_to_prod[w]))

19 2972
50 18
73 145
74 87
75 3543
77 1757
78 271
81 2513
82 2079
83 631
84 3596
85 1951
87 1534
89 3580
41 883


In [20]:
wp_to_margin = {}
wp_to_picks = {}
wp_to_quantity = {}
for w in warehouses:
    for p in warehouse_to_prod[w]:
        wp_to_margin[w,p] = []
        wp_to_picks[w,p] = []
        wp_to_quantity[w,p] = []

for w, p, s, c, pk, q in zip(df.legacy_division_cd.values, df.legacy_product_cd.values, df.Sales_6_mos, df.cogs_6mos, df.picks_6mos, df.qty_6mos):
    if p in warehouse_to_prod[w]:
        wp_to_margin[w,p].append(s-c)
        wp_to_picks[w,p].append(pk)
        wp_to_quantity[w,p].append(q)

for w in warehouses:
    for p in warehouse_to_prod[w]:
        wp_to_margin[w,p] = sum(wp_to_margin[w,p])
        wp_to_picks[w,p] = sum(wp_to_picks[w,p])
        wp_to_quantity[w,p] = sum(wp_to_quantity[w,p])

count = 0
for w in warehouses:
    for p in warehouse_to_prod[w]:
        if wp_to_margin[w,p] <= 0:
            count += 1

print("Number of not profitable products at the warehouse level: ", count)
count2 = 0
for p in products:
    if prod_to_profit[p] <= 0:
        count2 += 1
            
print("Number of not profitable products at the regional level: ", count2)

1054
790


In [None]:
#Create a Dictionary of a warehouse, product cd pair with its net OH in units, net OH $ as values, item poi days & DIOH
#Example:
#Warehouse 19 houses product 10012415 and it has 4 units in on hand inventory, $81.2 of on hand inventory,
#302.430555555556 of average poi in days & 38.5263664820611 of DIOH
#The dictionary will look like this: wp_to_oh[19, 10012415] = [4.0, 81.2, 302.430555555556, 38.5263664820611]

wp_to_stats = {}
for w in warehouses:
    for p in warehouse_to_prod[w]:
        wp_to_stats[w,p] = [0,0,0,0]

for w, p, oh_usd, oh, poi, DIOH in zip(df.legacy_division_cd.values, df.legacy_product_cd.values, df.net_OH_usd.values, df.net_OH.values,
                           df.item_poi_days.values, df.DIOH.values):
    if wp_to_stats[w,p] == [0,0,0,0]:
        wp_to_stats[w,p] = [oh, oh_usd, poi, DIOH]

In [None]:
#Get number of customers that buy from Packaging, From facilities and from both
customers = df.LEGACY_CUSTOMER_CD.unique()
customer_to_seg = {}
for c in customers:
    customer_to_seg[c] = []

for c, s in zip(df.LEGACY_CUSTOMER_CD.values, df.segment.values):
    if s not in customer_to_seg[c]:
        customer_to_seg[c].append(s)


pack_count = 0
facility_count = 0
both_count = 0


for c, l in customer_to_seg.items():
    if "Packaging" in l and "Facility Solutions" not in l:
        pack_count += 1
    if "Facility Solutions" in l and "Packaging" not in l:
        facility_count += 1
    if "Packaging" in l and "Facility Solutions" in l:
        both_count += 1
print("Total Number of Customers:", len(customers), "\n")
print("Only Packaging number of customers:", pack_count)
print("Only Facilities number of customers:", facility_count)
print("Number of Customers that buy from both:", both_count)