In [1]:
#Prep Data for Model

In [2]:
import pandas as pd
import numpy as np

## 1. Get Demand Data

In [3]:
revenue = pd.read_excel('../../data/Revenue.xlsx')

In [4]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = revenue[revenue['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Revenue ($M)'])
apac_latam = pd.melt(apac_latam, id_vars=['Year', 'Category'], var_name='Region', value_name='Revenue ($M)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Revenue ($M)'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Revenue ($M)'] * latam_gdp_perc


In [5]:
#Merge Asia Pacific and Latin America back in
revenue = revenue[revenue['Region'] != 'Asia Pacific & Latin America']
revenue = pd.concat([revenue, apac_latam], ignore_index=True, axis=0)

In [6]:
rev_by_region = revenue.groupby(['Year', 'Region']).sum('Revenue ($M)')
rev_by_region = rev_by_region.reset_index()
rev_by_region = rev_by_region.rename(columns = {'Revenue ($M)': 'Yr_Region_Total'})

In [7]:
rev_by_year = revenue.groupby('Year').sum('Revenue ($M)')
rev_by_year = rev_by_year.reset_index()
rev_by_year = rev_by_year.rename(columns = {'Revenue ($M)': 'Yr_Total'})

In [8]:
revenue = revenue.merge(rev_by_region,
                        how = 'left',
                        on = ['Year', 'Region'])
revenue = revenue.merge(rev_by_year,
                        how = 'left',
                        on = 'Year')

In [9]:
revenue = revenue[revenue['Category'] == 'Footwear']
revenue['perc_rev_footware'] = revenue['Revenue ($M)']/revenue['Yr_Region_Total']
revenue['perc_rev_in_region'] = revenue['Yr_Region_Total']/revenue['Yr_Total']

In [10]:
avg_cost_shoes = 116.5 #based on nike website

In [11]:
revenue['shoes_made'] = revenue['Revenue ($M)'] * 1e6 / avg_cost_shoes
revenue['shoes_made'] = revenue['shoes_made'].round(decimals=0)

In [12]:
demand = revenue[['Year', 'Region', 'shoes_made']]

In [13]:
demand = demand.pivot(index='Region', columns='Year', values='shoes_made')

In [14]:
demand.to_csv('../../data/demand.csv')

In [15]:
#will need for workers
shoes_per_year  = demand.sum(axis = 0).to_list()
shoes_per_year.reverse()

In [16]:
shoes_per_year

[284420602.0,
 250154506.0,
 240523606.0,
 200042918.0,
 207914162.0,
 191141632.0,
 180952790.0,
 170566523.0]

In [17]:
rev = revenue[['Year', 'Region', 'Revenue ($M)']]
rev = rev.pivot(index='Region', columns='Year', values='Revenue ($M)')
rev.to_csv('../../data/rev.csv')

In [18]:
demand

Year,2016,2017,2018,2019,2020,2021,2022,2023
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Asia Pacific,18435107.0,20668712.0,22493348.0,22789064.0,21700575.0,23021863.0,25865777.0,28583854.0
"Europe, Middle East, and Africa",43287554.0,44566524.0,50429185.0,54017167.0,50575107.0,59828326.0,63416309.0,70901288.0
Greater China,22309013.0,25064378.0,30008584.0,36583691.0,39785408.0,49339056.0,46489270.0,46652361.0
Latin America,6715107.0,7528712.0,8193348.0,8301064.0,7904575.0,8385863.0,9421777.0,10411854.0
North America,79819742.0,83124464.0,80017167.0,86223176.0,80077253.0,99948498.0,104961373.0,127871245.0


## 2. Get Availability

### a. Read in Workers Data (2023)

In [19]:
workers = pd.read_excel('../../data/imap_export.xls', skiprows = 1)

In [20]:
workers = workers[workers['Product Type Type'] == 'Footwear']
workers = workers[workers['Factory Type'] == 'FINISHED GOODS']


In [21]:
workers = workers[['Region', 'Total Workers']].groupby('Region').sum()

In [22]:
output_per_worker = shoes_per_year[0] / workers['Total Workers'].sum()
print(output_per_worker)

428.4158978383435


### b. Prep Rev Data to Estimate Workers Each Year

In [23]:
rev_by_year['priorYr'] = rev_by_year['Year']-1

In [24]:
rev_by_year = rev_by_year.merge(rev_by_year,
                                how = 'left',
                                left_on = 'priorYr',
                                right_on = 'Year')

In [25]:
rev_by_year = rev_by_year.drop(columns = ['priorYr_x', 'priorYr_y', 'Year_y'])
rev_by_year = rev_by_year.drop(0)
rev_by_year = rev_by_year.rename(columns = {'Year_x' : 'Year',
                                           'Yr_Total_x': 'Yr_Total',
                                           'Yr_Total_y' : 'Prior_Yr_Total'})

In [26]:
rev_by_year['growth'] = (rev_by_year['Yr_Total']- rev_by_year['Prior_Yr_Total'])/rev_by_year['Prior_Yr_Total']

In [27]:
rev_by_year = rev_by_year.sort_values('Year', ascending = False)

### c. Get Workers Each Year

In [28]:
workers_per_year = workers

In [29]:
for i in range(0, 7):
    year = rev_by_year['Year'].iloc[i] - 1
    col_year = 'yr_' + str(year)
    last_column_name = workers_per_year.columns[-1]
    workers_per_year[col_year] = workers_per_year[last_column_name] * 0.95
    #growth = rev_by_year['growth'].iloc[i]
    #workers_per_year[col_year] = workers_per_year[last_column_name] * (1.0 - growth) ** 0.5

In [30]:
workers_per_year = workers_per_year.rename(columns = {'Total Workers' : 'yr_2023'})

In [31]:
workers_per_year

Unnamed: 0_level_0,yr_2023,yr_2022,yr_2021,yr_2020,yr_2019,yr_2018,yr_2017,yr_2016
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,16750,15912.5,15116.875,14361.03125,13642.979687,12960.830703,12312.789168,11697.14971
EMEA,2514,2388.3,2268.885,2155.44075,2047.668712,1945.285277,1848.021013,1755.619962
N ASIA,70500,66975.0,63626.25,60444.9375,57422.690625,54551.556094,51823.978289,49232.779375
S ASIA,256245,243432.75,231261.1125,219698.056875,208713.154031,198277.49633,188363.621513,178945.440438
SE ASIA,317880,301986.0,286886.7,272542.365,258915.24675,245969.484412,233671.010192,221987.459682


In [32]:
seed_value = 123
np.random.seed(seed_value)

In [33]:
shoe_cols = []
for i in range(0, workers_per_year.shape[1]):
    col = workers_per_year.columns[i]
    workers_per_year[col] = workers_per_year[col].astype(int)
    workers_per_year[col + '_shoes'] = workers_per_year[col] * output_per_worker
    total_workers = workers_per_year[col].sum()
    #workers_per_year[col + '_perc'] = workers_per_year[col] / total_workers
    #workers_per_year[col + '_shoes'] = workers_per_year[col + '_perc'] * shoes_per_year[i-1]
    #workers_per_year[col + '_shoes'] = (workers_per_year[col + '_shoes'] * (1 + np.random.rand()/2)).astype(int)
    shoe_cols.append(col + '_shoes')

In [34]:
perc_shoes_produced = workers_per_year[shoe_cols]/workers_per_year[shoe_cols].sum(axis =0)

In [35]:
availability = workers_per_year[shoe_cols]
# Model decreasing availability
availability['yr_2020_shoes'] = availability['yr_2020_shoes'] * 0.9
availability['yr_2019_shoes'] = availability['yr_2019_shoes'] * 0.95
availability['yr_2018_shoes'] = availability['yr_2018_shoes'] * 0.97
availability.to_csv('../../data/availability.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  availability['yr_2020_shoes'] = availability['yr_2020_shoes'] * 0.9
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  availability['yr_2019_shoes'] = availability['yr_2019_shoes'] * 0.95
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  availability['yr_2018_shoes'] = availability['yr_2018_shoes'] * 0.97

In [36]:
availability

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,7175966.0,6816954.0,6475935.0,5537233.0,5552227.0,5385702.0,5274657.0,5011181.0
EMEA,1077038.0,1023057.0,971647.3,830912.6,833119.0,808270.9,791712.6,751869.9
N ASIA,30203320.0,28693150.0,27258390.0,23305650.0,23370470.0,22669400.0,22201800.0,21091770.0
S ASIA,109779400.0,104290100.0,99075890.0,84709900.0,84945170.0,82396670.0,80697700.0,76662880.0
SE ASIA,136184800.0,129375600.0,122906500.0,105085200.0,105377100.0,102215700.0,100108400.0,95102760.0


## 3. Get Cost Data

### a. Cost of Sales

In [37]:
cost_of_sales = pd.read_excel('../../data/CostOfSales.xlsx')

In [38]:
cost_of_sales = cost_of_sales[['Year', 'Shoes - Cost of Sales ($M)']]
cost_of_sales = cost_of_sales.rename(columns = {'Shoes - Cost of Sales ($M)' : 'Cost_of_Sales_shoes_m'})

### b. Inventory Costs

In [39]:
inventory_cost = pd.read_excel('../../data/inventory_cost.xlsx')

In [40]:
#Split Asia Pacific and Latin America into Separate Regions
apac_latam = inventory_cost[inventory_cost['Region'] == 'Asia Pacific & Latin America']
apac_gdp_perc = 0.733 #looked up online
latam_gdp_perc = 1 - apac_gdp_perc
apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc
apac_latam = apac_latam.drop(columns = ['Region','Inventory Costs'])
apac_latam = pd.melt(apac_latam, id_vars='Year', var_name='Region', value_name='Inventory Costs')
#re-join it
inventory_cost = inventory_cost[inventory_cost['Region'] != 'Asia Pacific & Latin America']
inventory_cost = pd.concat([inventory_cost, apac_latam], ignore_index=True, axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Asia Pacific'] = apac_latam['Inventory Costs'] * apac_gdp_perc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apac_latam['Latin America'] = apac_latam['Inventory Costs'] * latam_gdp_perc


### c. Get Costs

In [41]:
costs = inventory_cost.merge(cost_of_sales,
                             how = 'inner',
                             on = 'Year')

In [42]:
perc = revenue[['Year', 'Region', 'perc_rev_footware', 'perc_rev_in_region']]

In [43]:
costs = costs.merge(perc,
                    how = 'inner',
                    on = ['Year', 'Region'])

In [44]:
costs['cost_of_sales_shoes_region'] = costs['Cost_of_Sales_shoes_m'] * costs['perc_rev_footware'] * costs['perc_rev_in_region']
costs['inventory_costs_shoes'] = costs['Inventory Costs'] * costs['perc_rev_footware']

In [45]:
costs = costs.drop(columns = ['Inventory Costs', 
                              'Cost_of_Sales_shoes_m', 
                              'perc_rev_footware', 
                              'perc_rev_in_region'])

In [46]:
costs = costs.rename(columns = {'inventory_costs_shoes' : 'inventory_costs',
                               'cost_of_sales_shoes_region': 'cost_of_sales'})

In [47]:
time = list(costs['Year'].unique())
print(time)

[2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016]


In [48]:
time_holding_perc = []
for i in range(0, len(time)):
    time_holding_perc.append(np.random.rand()/10 + 0.2)

In [49]:
holding_cost_perc = pd.DataFrame({'Year': time, 'Hold_Cost_perc': time_holding_perc})

In [50]:
costs = costs.merge(holding_cost_perc,
                    how = 'inner',
                    on = 'Year')

In [51]:
costs['inventory_costs_holding'] = costs['inventory_costs'] * costs['Hold_Cost_perc']
costs['inventory_costs_goods'] = costs['inventory_costs'] * (1 - costs['Hold_Cost_perc'])

In [52]:
costs['shoes_in_inventory'] = costs['inventory_costs_goods'] * 1e6 / avg_cost_shoes

In [53]:
costs['holding_cost_per_shoe'] = costs['inventory_costs_holding'] * 1e6 / costs['shoes_in_inventory']
costs['holding_cost_per_shoe'] = costs['holding_cost_per_shoe'].round(decimals = 2)

In [54]:
inventory_holding_cost = costs[['Year', 'holding_cost_per_shoe']].groupby('Year').mean().reset_index()

In [55]:
inventory_holding_cost.to_csv('../../data/inventory_holding_cost.csv')

In [56]:
cost_output = costs[['Year', 'Region', 'cost_of_sales']]
cost_output = cost_output.pivot(index='Region', columns='Year', values='cost_of_sales')
cost_output.to_csv('../../data/cost.csv')

In [57]:
costs_year = costs[['Year', 'cost_of_sales']].groupby('Year').sum().reset_index()
costs_year = costs_year.sort_values('Year', ascending = False)
costs_year.head()
sales = np.array(costs_year['cost_of_sales'])
sales

array([13387.52275797, 10902.57649105, 11240.26333679,  9100.57762775,
        9187.81284545,  8566.89473732,  8180.34572968,  7419.85471734])

In [58]:
final_df = perc_shoes_produced.copy()
for i in range(0,perc_shoes_produced.shape[1]):
    col = final_df.columns[i]
    final_df[col] = final_df[col] * sales[i] * 1e6
final_df = final_df / availability             

In [59]:
final_df.head()

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,47.069455,40.350187,43.789529,41.466366,41.74796,40.130527,39.126512,37.35695
EMEA,47.069455,40.350187,43.789529,41.466366,41.74796,40.130527,39.126512,37.35695
N ASIA,47.069455,40.350187,43.789529,41.466366,41.74796,40.130527,39.126512,37.35695
S ASIA,47.069455,40.350187,43.789529,41.466366,41.74796,40.130527,39.126512,37.35695
SE ASIA,47.069455,40.350187,43.789529,41.466366,41.74796,40.130527,39.126512,37.35695


In [60]:
np.random.seed(123)
for i in range(0, final_df.shape[0]):
    for j in range(0, final_df.shape[1]):
        #np.random.seed(i+j)
        final_df.iloc[i,j] = final_df.iloc[i,j] * (1+ np.random.rand()/5)


In [61]:
final_df

Unnamed: 0_level_0,yr_2023_shoes,yr_2022_shoes,yr_2021_shoes,yr_2020_shoes,yr_2019_shoes,yr_2018_shoes,yr_2017_shoes,yr_2016_shoes
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMERICAS,53.62594,42.659342,45.776272,46.03857,47.755233,43.526424,46.801288,42.47358
EMEA,51.596896,43.51459,46.795049,47.512575,45.40986,40.609508,42.241328,42.870801
N ASIA,48.787412,41.766089,48.444805,45.876958,47.04495,46.948156,44.795594,41.922145
S ASIA,53.870458,42.956478,46.95804,43.359416,44.200353,45.194808,39.847261,40.5973
SE ASIA,51.12555,44.334244,47.51891,44.056034,45.30782,47.300962,46.514849,41.106367


In [62]:
final_df.to_csv('../../data/cost.csv')

In [63]:
(1+ np.random.rand()/4)

1.1559882379480277