In [1]:
#Prep Data for Model

In [2]:
import pandas as pd
import numpy as np

## 1. Get Demand Data

In [3]:
revenue = pd.read_excel('Revenue.xlsx')

In [4]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = revenue[revenue['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Revenue ($M)'])
apac_latam = pd.melt(apac_latam, id_vars=['Year', 'Category'], var_name='Region', value_name='Revenue ($M)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc


In [5]:
#Merge Asia Pacific and Latin America back in
revenue = revenue[revenue['Region'] != 'Asia Pacific & Latin America']
revenue = pd.concat([revenue, apac_latam], ignore_index=True, axis=0)

In [6]:
rev_by_region = revenue.groupby(['Year', 'Region']).sum('Revenue ($M)')
rev_by_region = rev_by_region.reset_index()
rev_by_region = rev_by_region.rename(columns = {'Revenue ($M)': 'Yr_Region_Total'})

In [7]:
rev_by_year = revenue.groupby('Year').sum('Revenue ($M)')
rev_by_year = rev_by_year.reset_index()
rev_by_year = rev_by_year.rename(columns = {'Revenue ($M)': 'Yr_Total'})

In [8]:
revenue = revenue.merge(rev_by_region,
                        how = 'left',
                        on = ['Year', 'Region'])
revenue = revenue.merge(rev_by_year,
                        how = 'left',
                        on = 'Year')

In [9]:
revenue = revenue[revenue['Category'] == 'Footwear']
revenue['perc_rev_footware'] = revenue['Revenue ($M)']/revenue['Yr_Region_Total']
revenue['perc_rev_in_region'] = revenue['Yr_Region_Total']/revenue['Yr_Total']

In [10]:
avg_cost_shoes = 116.5 #based on nike website

In [11]:
revenue['shoes_made'] = revenue['Revenue ($M)'] * 1e6 / avg_cost_shoes
revenue['shoes_made'] = revenue['shoes_made'].round(decimals=0)

In [12]:
demand = revenue[['Year', 'Region', 'shoes_made']]

In [13]:
demand = demand.pivot(index='Region', columns='Year', values='shoes_made')

In [14]:
demand.to_csv('demand.csv')

In [15]:
#will need for workers
shoes_per_year  = demand.sum(axis = 0).to_list()
shoes_per_year.reverse()

In [16]:
shoes_per_year

[284420602.0,
 250154506.0,
 240523606.0,
 200042918.0,
 207914162.0,
 191141632.0,
 180952790.0,
 170566523.0]

In [17]:
rev = revenue[['Year', 'Region', 'Revenue ($M)']]
rev = rev.pivot(index='Region', columns='Year', values='Revenue ($M)')
rev.to_csv('rev.csv')

## 2. Get Availability

### a. Read in Workers Data (2023)

In [18]:
workers = pd.read_excel('imap_export.xls', skiprows = 1)

In [19]:
workers = workers[workers['Product Type Type'] == 'Footwear']
wokers = workers[workers['Factory Type'] == 'FINISHED GOODS']


In [20]:
workers = workers[['Region', 'Total Workers']].groupby('Region').sum()

In [21]:
workers.head()

Unnamed: 0_level_0,Total Workers
Region,Unnamed: 1_level_1
AMERICAS,16925
EMEA,2514
N ASIA,99736
S ASIA,264761
SE ASIA,348364


### b. Prep Rev Data to Estimate Workers Each Year

In [22]:
rev_by_year['priorYr'] = rev_by_year['Year']-1

In [23]:
rev_by_year = rev_by_year.merge(rev_by_year,
                                how = 'left',
                                left_on = 'priorYr',
                                right_on = 'Year')

In [24]:
rev_by_year = rev_by_year.drop(columns = ['priorYr_x', 'priorYr_y', 'Year_y'])
rev_by_year = rev_by_year.drop(0)
rev_by_year = rev_by_year.rename(columns = {'Year_x' : 'Year',
                                           'Yr_Total_x': 'Yr_Total',
                                           'Yr_Total_y' : 'Prior_Yr_Total'})

In [25]:
rev_by_year['growth'] = (rev_by_year['Yr_Total']- rev_by_year['Prior_Yr_Total'])/rev_by_year['Prior_Yr_Total']

In [26]:
rev_by_year = rev_by_year.sort_values('Year', ascending = False)

### c. Get Workers Each Year

In [27]:
workers_per_year = workers

In [28]:
for i in range(0, 7):
    year = rev_by_year['Year'].iloc[i] - 1
    col_year = 'yr_' + str(year)
    last_column_name = workers_per_year.columns[-1]
    growth = rev_by_year['growth'].iloc[i]
    workers_per_year[col_year] = workers_per_year[last_column_name] * (1.0 - growth) ** 0.5

In [29]:
workers_per_year = workers_per_year.rename(columns = {'Total Workers' : 'yr_2023'})

In [30]:
workers_per_year

Unnamed: 0_level_0,yr_2023,yr_2022,yr_2021,yr_2020,yr_2019,yr_2018,yr_2017,yr_2016
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,16925,16069.015157,15671.379968,14109.684516,14417.175141,13822.515598,13333.115306,12949.517462
EMEA,2514,2386.854009,2327.790206,2095.819608,2141.493548,2053.164208,1980.46983,1923.491102
N ASIA,99736,94691.83431,92348.641211,83145.849035,84957.836329,81453.613926,78569.665475,76309.191938
S ASIA,264761,251370.665995,245150.382969,220720.483439,225530.617875,216228.245334,208572.463311,202571.769138
SE ASIA,348364,330745.429609,322560.981461,290416.906164,296745.926195,284506.16389,274432.932377,266537.412171


In [31]:
seed_value = 123
np.random.seed(seed_value)

In [32]:
shoe_cols = []
for i in range(0, workers_per_year.shape[1]):
    col = workers_per_year.columns[i]
    workers_per_year[col] = workers_per_year[col].astype(int)
    total_workers = workers_per_year[col].sum()
    workers_per_year[col + '_perc'] = workers_per_year[col] / total_workers
    workers_per_year[col + '_shoes'] = workers_per_year[col + '_perc'] * shoes_per_year[i-1]
    workers_per_year[col + '_shoes'] = (workers_per_year[col + '_shoes'] * (1+ np.random.rand()/4)).astype(int)
    shoe_cols.append(col + '_shoes')

In [33]:
perc_shoes_produced = workers_per_year[shoe_cols]/workers_per_year[shoe_cols].sum(axis =0)

In [34]:
availability = workers_per_year[shoe_cols]
availability.to_csv('availability.csv')

In [35]:
availability

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,4628549,7043821,6109370,6324931,5454975,5313444,5500841,4898050
EMEA,687513,1045899,907185,939168,810092,789212,816895,727388
N ASIA,27275215,41507653,36002052,37273118,32145270,31312111,32415483,28864415
S ASIA,72405283,110187650,95572216,98946691,85334026,83122231,86051270,76623904
SE ASIA,95268541,144981558,125750659,130190750,112279721,109369617,113223358,100819493


## 3. Get Cost Data

### a. Cost of Sales

In [36]:
cost_of_sales = pd.read_excel('CostOfSales.xlsx')

In [37]:
cost_of_sales = cost_of_sales[['Year', 'Shoes - Cost of Sales ($M)']]
cost_of_sales = cost_of_sales.rename(columns = {'Shoes - Cost of Sales ($M)' : 'Cost_of_Sales_shoes_m'})

### b. Inventory Costs

In [38]:
inventory_cost = pd.read_excel('inventory_cost.xlsx')

In [39]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = inventory_cost[inventory_cost['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Inventory Costs'])
apac_latam = pd.melt(apac_latam, id_vars='Year', var_name='Region', value_name='Inventory Costs')
#re-join it
inventory_cost = inventory_cost[inventory_cost['Region'] != 'Asia Pacific & Latin America']
inventory_cost = pd.concat([inventory_cost, apac_latam], ignore_index=True, axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc


### c. Get Costs

In [40]:
costs = inventory_cost.merge(cost_of_sales,
                             how = 'inner',
                             on = 'Year')

In [41]:
perc = revenue[['Year', 'Region', 'perc_rev_footware', 'perc_rev_in_region']]

In [42]:
costs = costs.merge(perc,
                    how = 'inner',
                    on = ['Year', 'Region'])

In [43]:
costs['cost_of_sales_shoes_region'] = costs['Cost_of_Sales_shoes_m'] * costs['perc_rev_footware'] * costs['perc_rev_in_region']
costs['inventory_costs_shoes'] = costs['Inventory Costs'] * costs['perc_rev_footware']

In [44]:
costs = costs.drop(columns = ['Inventory Costs', 
                              'Cost_of_Sales_shoes_m', 
                              'perc_rev_footware', 
                              'perc_rev_in_region'])

In [45]:
costs = costs.rename(columns = {'inventory_costs_shoes' : 'inventory_costs',
                               'cost_of_sales_shoes_region': 'cost_of_sales'})

In [46]:
time = list(costs['Year'].unique())
print(time)

[2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016]


In [47]:
time_holding_perc = []
for i in range(0, len(time)):
    time_holding_perc.append(np.random.rand()/10 + 0.2)

In [48]:
holding_cost_perc = pd.DataFrame({'Year': time, 'Hold_Cost_perc': time_holding_perc})

In [49]:
costs = costs.merge(holding_cost_perc,
                    how = 'inner',
                    on = 'Year')

In [50]:
costs['inventory_costs_holding'] = costs['inventory_costs'] * costs['Hold_Cost_perc']
costs['inventory_costs_goods'] = costs['inventory_costs'] * (1 - costs['Hold_Cost_perc'])

In [51]:
costs['shoes_in_inventory'] = costs['inventory_costs_goods'] * 1e6 / avg_cost_shoes

In [52]:
costs['holding_cost_per_shoe'] = costs['inventory_costs_holding'] * 1e6 / costs['shoes_in_inventory']
costs['holding_cost_per_shoe'] = costs['holding_cost_per_shoe'].round(decimals = 2)

In [54]:
inventory_holding_cost = costs[['Year', 'holding_cost_per_shoe']].groupby('Year').mean().reset_index()

In [55]:
inventory_holding_cost.to_csv('inventory_holding_cost.csv')

In [56]:
cost_output = costs[['Year', 'Region', 'cost_of_sales']]
cost_output = cost_output.pivot(index='Region', columns='Year', values='cost_of_sales')
cost_output.to_csv('cost.csv')

In [57]:
costs_year = costs[['Year', 'cost_of_sales']].groupby('Year').sum().reset_index()
costs_year = costs_year.sort_values('Year', ascending = False)
costs_year.head()
sales = np.array(costs_year['cost_of_sales'])
sales

array([13387.52275797, 10902.57649105, 11240.26333679,  9100.57762775,
        9187.81284545,  8566.89473732,  8180.34572968,  7419.85471734])

In [58]:
final_df = perc_shoes_produced.copy()
for i in range(0,perc_shoes_produced.shape[1]):
    col = final_df.columns[i]
    final_df[col] = final_df[col] * sales[i] * 1e6
final_df = final_df / availability             

In [59]:
final_df.head()

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,66.849005,35.77353,42.521753,33.253271,38.927438,37.262498,34.370067,35.010338
EMEA,66.849005,35.77353,42.521753,33.253271,38.927438,37.262498,34.370067,35.010338
N ASIA,66.849005,35.77353,42.521753,33.253271,38.927438,37.262498,34.370067,35.010338
S ASIA,66.849005,35.77353,42.521753,33.253271,38.927438,37.262498,34.370067,35.010338
SE ASIA,66.849005,35.77353,42.521753,33.253271,38.927438,37.262498,34.370067,35.010338


In [62]:
np.random.seed(123)
for i in range(0, final_df.shape[0]):
    for j in range(0, final_df.shape[1]):
        #np.random.seed(i+j)
        final_df.iloc[i,j] = final_df.iloc[i,j] * (1+ np.random.rand()/5)


In [63]:
final_df

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,81.465015,38.902974,45.459355,38.955322,47.732567,42.125713,45.143957,42.531565
EMEA,76.803188,40.091767,46.99967,40.879749,44.198936,37.932276,38.583221,43.142937
N ASIA,70.553348,37.678513,49.54279,38.746881,46.649457,47.295806,42.200713,41.689402
S ASIA,82.035194,39.31417,47.248234,35.565074,42.424662,44.612729,35.325595,39.697252
SE ASIA,75.73803,41.246166,48.108079,36.433232,44.047988,47.844288,44.718096,40.45752


In [64]:
final_df.to_csv('cost.csv')

In [65]:
(1+ np.random.rand()/4)

1.1559882379480277