In [1]:
#Prep Data for Model

In [2]:
import pandas as pd
import numpy as np

## 1. Get Demand Data

In [3]:
revenue = pd.read_excel('Revenue.xlsx')

In [4]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = revenue[revenue['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Revenue ($M)'])
apac_latam = pd.melt(apac_latam, id_vars=['Year', 'Category'], var_name='Region', value_name='Revenue ($M)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc


In [5]:
#Merge Asia Pacific and Latin America back in
revenue = revenue[revenue['Region'] != 'Asia Pacific & Latin America']
revenue = pd.concat([revenue, apac_latam], ignore_index=True, axis=0)

In [6]:
rev_by_region = revenue.groupby(['Year', 'Region']).sum('Revenue ($M)')
rev_by_region = rev_by_region.reset_index()
rev_by_region = rev_by_region.rename(columns = {'Revenue ($M)': 'Yr_Region_Total'})

In [7]:
rev_by_year = revenue.groupby('Year').sum('Revenue ($M)')
rev_by_year = rev_by_year.reset_index()
rev_by_year = rev_by_year.rename(columns = {'Revenue ($M)': 'Yr_Total'})

In [8]:
revenue = revenue.merge(rev_by_region,
                        how = 'left',
                        on = ['Year', 'Region'])
revenue = revenue.merge(rev_by_year,
                        how = 'left',
                        on = 'Year')

In [9]:
revenue = revenue[revenue['Category'] == 'Footwear']
revenue['perc_rev_footware'] = revenue['Revenue ($M)']/revenue['Yr_Region_Total']
revenue['perc_rev_in_region'] = revenue['Yr_Region_Total']/revenue['Yr_Total']

In [10]:
avg_cost_shoes = 116.5 #based on nike website

In [11]:
revenue['shoes_made'] = revenue['Revenue ($M)'] * 1e6 / avg_cost_shoes
revenue['shoes_made'] = revenue['shoes_made'].round(decimals=0)

In [12]:
demand = revenue[['Year', 'Region', 'shoes_made']]

In [13]:
demand = demand.pivot(index='Region', columns='Year', values='shoes_made')

In [14]:
demand.to_csv('demand.csv')

In [15]:
#will need for workers
shoes_per_year  = demand.sum(axis = 0).to_list()
shoes_per_year.reverse()

## 2. Get Availability

### a. Read in Workers Data (2023)

In [16]:
workers = pd.read_excel('imap_export.xls', skiprows = 1)

In [17]:
workers = workers[workers['Product Type Type'] == 'Footwear']
wokers = workers[workers['Factory Type'] == 'FINISHED GOODS']


In [18]:
workers = workers[['Region', 'Total Workers']].groupby('Region').sum()

In [19]:
workers.head()

Unnamed: 0_level_0,Total Workers
Region,Unnamed: 1_level_1
AMERICAS,16925
EMEA,2514
N ASIA,99736
S ASIA,264761
SE ASIA,348364


### b. Prep Rev Data to Estimate Workers Each Year

In [20]:
rev_by_year['priorYr'] = rev_by_year['Year']-1

In [21]:
rev_by_year = rev_by_year.merge(rev_by_year,
                                how = 'left',
                                left_on = 'priorYr',
                                right_on = 'Year')

In [22]:
rev_by_year = rev_by_year.drop(columns = ['priorYr_x', 'priorYr_y', 'Year_y'])
rev_by_year = rev_by_year.drop(0)
rev_by_year = rev_by_year.rename(columns = {'Year_x' : 'Year',
                                           'Yr_Total_x': 'Yr_Total',
                                           'Yr_Total_y' : 'Prior_Yr_Total'})

In [23]:
rev_by_year['growth'] = (rev_by_year['Yr_Total']- rev_by_year['Prior_Yr_Total'])/rev_by_year['Prior_Yr_Total']

In [24]:
rev_by_year = rev_by_year.sort_values('Year', ascending = False)

### c. Get Workers Each Year

In [25]:
workers_per_year = workers

In [26]:
for i in range(0, 7):
    year = rev_by_year['Year'].iloc[i] - 1
    col_year = 'yr_' + str(year)
    last_column_name = workers_per_year.columns[-1]
    growth = rev_by_year['growth'].iloc[i]
    workers_per_year[col_year] = workers_per_year[last_column_name] * (1.0 - growth) ** 0.5

In [27]:
workers_per_year = workers_per_year.rename(columns = {'Total Workers' : 'yr_2023'})

In [28]:
seed_value = 123
np.random.seed(seed_value)

In [29]:
shoe_cols = []
for i in range(1, workers_per_year.shape[1]):
    col = workers_per_year.columns[i]
    workers_per_year[col] = workers_per_year[col].astype(int)
    total_workers = workers_per_year[col].sum()
    workers_per_year[col + '_perc'] = workers_per_year[col] / total_workers
    workers_per_year[col + '_shoes'] = workers_per_year[col + '_perc'] * shoes_per_year[i-1]
    workers_per_year[col + '_shoes'] = (workers_per_year[col + '_shoes'] * (np.random.rand()/4)).astype(int)
    shoe_cols.append(col + '_shoes')

In [30]:
availability = workers_per_year[shoe_cols]
availability.to_csv('availability.csv')

## 3. Get Cost Data

### a. Cost of Sales

In [31]:
cost_of_sales = pd.read_excel('CostOfSales.xlsx')

In [32]:
cost_of_sales = cost_of_sales[['Year', 'Shoes - Cost of Sales ($M)']]
cost_of_sales = cost_of_sales.rename(columns = {'Shoes - Cost of Sales ($M)' : 'Cost_of_Sales_shoes_m'})

### b. Inventory Costs

In [33]:
inventory_cost = pd.read_excel('inventory_cost.xlsx')

In [34]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = inventory_cost[inventory_cost['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Inventory Costs'])
apac_latam = pd.melt(apac_latam, id_vars='Year', var_name='Region', value_name='Inventory Costs')
#re-join it
inventory_cost = inventory_cost[inventory_cost['Region'] != 'Asia Pacific & Latin America']
inventory_cost = pd.concat([inventory_cost, apac_latam], ignore_index=True, axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc


### c. Get Costs

In [35]:
costs = inventory_cost.merge(cost_of_sales,
                             how = 'inner',
                             on = 'Year')

In [36]:
perc = revenue[['Year', 'Region', 'perc_rev_footware', 'perc_rev_in_region']]

In [37]:
costs = costs.merge(perc,
                    how = 'inner',
                    on = ['Year', 'Region'])

In [38]:
costs['cost_of_sales_shoes_region'] = costs['Cost_of_Sales_shoes_m'] * costs['perc_rev_footware'] * costs['perc_rev_in_region']
costs['inventory_costs_shoes'] = costs['Inventory Costs'] * costs['perc_rev_footware']

In [39]:
costs = costs.drop(columns = ['Inventory Costs', 
                              'Cost_of_Sales_shoes_m', 
                              'perc_rev_footware', 
                              'perc_rev_in_region'])

In [40]:
costs = costs.rename(columns = {'inventory_costs_shoes' : 'inventory_costs',
                               'cost_of_sales_shoes_region': 'cost_of_sales'})

In [41]:
costs['inventory_costs_holding'] = costs['inventory_costs'] * 0.25
costs['inventory_costs_goods'] = costs['inventory_costs'] * 0.75

In [42]:
costs['shoes_in_inventory'] = costs['inventory_costs_goods'] * 1e6 / avg_cost_shoes

In [43]:
costs['holding_cost_per_shoe'] = costs['inventory_costs_holding'] * 1e6 / costs['shoes_in_inventory']

In [44]:
costs

Unnamed: 0,Year,Region,cost_of_sales,inventory_costs,inventory_costs_holding,inventory_costs_goods,shoes_in_inventory,holding_cost_per_shoe
0,2023,North America,6018.829833,2623.934746,655.983687,1967.95106,16892280.0,38.833333
1,2023,"Europe, Middle East, and Africa",3337.284985,1333.985691,333.496423,1000.489268,8587891.0,38.833333
2,2023,Greater China,2195.901198,729.615756,182.403939,547.211817,4697097.0,38.833333
3,2023,Asia Pacific,1345.426442,462.919761,115.72994,347.189821,2980170.0,38.833333
4,2023,Latin America,490.0803,168.621523,42.155381,126.466142,1085546.0,38.833333
5,2022,North America,4574.570406,2730.362557,682.590639,2047.771917,17577440.0,38.833333
6,2022,"Europe, Middle East, and Africa",2763.89648,1117.169324,279.292331,837.876993,7192077.0,38.833333
7,2022,Greater China,2026.159087,749.212137,187.303034,561.909103,4823254.0,38.833333
8,2022,Asia Pacific,1127.31773,347.131321,86.78283,260.348491,2234751.0,38.833333
9,2022,Latin America,410.632788,126.444833,31.611208,94.833625,814022.5,38.833333
