In [1]:
import pandas as pd
import numpy as np

# Read Data1.xlsx into a dataframe
orders_data_fy17_fy21 = pd.read_excel('data_files/Data1.xlsx')

# Read Data2.xlsx into a dataframe
orders_data_fy21_fy22 = pd.read_excel('data_files/Data2.xlsx')

In [2]:
# Keep desired columns
temp_df = orders_data_fy17_fy21[['year_month', 'company_code_n', 'sold_to_customer_n_latest', 'bu', 'bu_n', 'material', 'ms_code', 'order_intake_quantity', 'order intake EUR']]

# Parse 'year_month' column and create 'FY' column. FY start from 4 month
temp_df['year_month_date'] = pd.to_datetime(temp_df['year_month'], format='%Y%m')
temp_df['FY'] = np.where(temp_df['year_month_date'].dt.month >= 4, temp_df['year_month_date'].dt.year, temp_df['year_month_date'].dt.year - 1)

# Create fiscal 'quarter' column
quarter_dict = {1: 'Q4', 2: 'Q4', 3: 'Q4', 4: 'Q1', 5: 'Q1', 6: 'Q1', 7: 'Q2', 8: 'Q2', 9: 'Q2', 10: 'Q3', 11: 'Q3', 12: 'Q3'}
temp_df['quarter'] = temp_df['year_month_date'].dt.month.map(quarter_dict)

# Create 'half_year' column
temp_df['half_year'] = np.where(temp_df['year_month_date'].dt.month.between(4, 9), 'HY1', 'HY2')

# Rename columns
temp_df = temp_df.rename(columns={'sold_to_customer_n_latest': 'customer', 'order intake EUR': 'order_intake_EUR'})

In [256]:
# Check new_df
new_df.to_excel('data_files/new_file.xlsx', index=False)

In [None]:
temp_df2 = orders_data_fy21_fy22[['year_month', 'company_code_n', 'sold_to_customer_n_latest', 'bu', 'bu_n', 'material', 'ms_code', 'order_intake_quantity', 'Order Intake Euro']]

# Parse 'year_month' column and create 'FY' column. FY start from 4 month
temp_df2['year_month_date'] = pd.to_datetime(temp_df2['year_month'], format='%Y%m')
temp_df2['FY'] = np.where(temp_df2['year_month_date'].dt.month >= 4, temp_df2['year_month_date'].dt.year, temp_df2['year_month_date'].dt.year - 1)

# Create fiscal 'quarter' column
quarter_dict = {1: 'Q4', 2: 'Q4', 3: 'Q4', 4: 'Q1', 5: 'Q1', 6: 'Q1', 7: 'Q2', 8: 'Q2', 9: 'Q2', 10: 'Q3', 11: 'Q3', 12: 'Q3'}
temp_df2['quarter'] = temp_df2['year_month_date'].dt.month.map(quarter_dict)

# Create 'half_year' column
temp_df2['half_year'] = np.where(temp_df2['year_month_date'].dt.month.between(4, 9), 'HY1', 'HY2')

# Rename columns
temp_df2 = temp_df2.rename(columns={'sold_to_customer_n_latest': 'customer', 'Order Intake Euro': 'order_intake_EUR'})

In [258]:
# Check new_df2
temp_df2.to_excel('data_files/new_file2.xlsx', index=False)

In [4]:
# combine two prepared df
orders_data = pd.concat([temp_df, temp_df2], ignore_index=True)
# Check sales_data
orders_data.to_excel('data_files/orders_data.xlsx', index=False)

In [5]:
orders_data_opt = orders_data.copy()
# Reduce the memory usage of the dataframe and improve performance
def check_unique_values(df):
    changed_columns = []
    for col in df.columns:
        unique_values = df[col].nunique()
        if unique_values < 50:
            df[col] = df[col].astype('category')
            changed_columns.append(col)
    if len(changed_columns) > 0:
        print("The following columns were changed to categorical data type: ")
        for col in changed_columns:
            print(col)
    else:
        print("No columns were changed to categorical data type.")
        
check_unique_values(orders_data_opt)

The following columns were changed to categorical data type: 
company_code_n
bu
bu_n
FY
quarter
half_year


In [6]:
# Delete rows where 'ms_code' contains text with 'BOP'
orders_data_opt['ms_code'] = orders_data_opt['ms_code'].astype(str)
orders_data_opt = orders_data_opt[~orders_data_opt['ms_code'].str.contains('BOP')]

In [262]:
# Total amount of orders per ms_code in eur

# Create Excel writer object
writer = pd.ExcelWriter('data_files/sales_data_bu_sheets_total_amount.xlsx', engine='xlsxwriter')

# Loop over unique values in 'bu' column
for bu in orders_data_opt['bu'].unique():
    # Create new dataframe for current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu][['ms_code', 'order_intake_EUR']]
    
    # Group by ms_code and sum order_intake_EUR
    bu_df = bu_df.groupby(['ms_code']).sum().reset_index()
    bu_df = bu_df.sort_values('order_intake_EUR', ascending=False)
    
    # Write dataframe to a new sheet in the Excel file
    bu_df.to_excel(writer, sheet_name=f'{bu}', index=False)

# Save the Excel file
writer.save()

In [263]:
# Total quantity of ms_codes per month

# Create Excel writer object
writer = pd.ExcelWriter('data_files/sales_data_bu_sheets_total_quantity_per_month.xlsx', engine='xlsxwriter')

# Loop over unique values in 'bu' column
for bu in orders_data_opt['bu'].unique():
    # Create new dataframe for current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu][['year_month_date', 'ms_code', 'order_intake_quantity']]
    
    # Group by month and ms_code and sum order_intake_quantity and order_intake_EUR
    bu_df = bu_df.groupby(['year_month_date', 'ms_code']).sum().reset_index()
    bu_df = bu_df.sort_values('order_intake_quantity', ascending=False)
    
    # Write dataframe to a new sheet in the Excel file
    bu_df.to_excel(writer, sheet_name=f'{bu}', index=False)

# Save the Excel file
writer.save()

In [7]:
# XYZ analysis

def mean_std_from_first_nonzero(row):
    # Find the index of the first non-zero value in the row
    nonzero_indices = np.nonzero(row.values[1:])[0]
    if len(nonzero_indices) > 0:
        first_nonzero_index = nonzero_indices[0] + 1  # Add 1 to account for the fact that we started at the second column
        mean = row.iloc[first_nonzero_index:].mean()  # Calculate the mean of the remaining values
        std = row.iloc[first_nonzero_index:].std()  # Calculate the standard deviation of the remaining values
        return pd.Series({'Mean': mean, 'Std': std})  # Return the mean and standard deviation as a Series
    else:
        return pd.Series({'Mean': 0, 'Std': 0})  # If all values are zero, return 0 for mean and standard deviation

In [8]:
# create a pivot table for ms_code for each month with NaN values filled with 0
pivot_table_month = pd.pivot_table(orders_data_opt, values='order_intake_quantity', index='ms_code', columns='year_month_date', aggfunc='sum', fill_value=0) # margins=True , fill_value=0
pivot_table_month.reset_index(inplace=True)
# sort the pivot table by the total column in descending order
#pivot_table_month = pivot_table_month.sort_values(by='All', ascending=False)

pivot_table_half_year = pd.pivot_table(orders_data_opt, values='order_intake_quantity', index='ms_code', columns=['FY', 'half_year'], aggfunc='sum', fill_value=0)
pivot_table_half_year.reset_index(inplace=True)

pivot_table_half_quarter = pd.pivot_table(orders_data_opt, values='order_intake_quantity', index='ms_code', columns=['FY', 'quarter'], aggfunc='sum', fill_value=0)
pivot_table_half_quarter.reset_index(inplace=True)


# Calculate the sum of the last year for different pivots
last_12_sum = pivot_table_month.iloc[:, -12:].sum(axis=1)
last_2_sum = pivot_table_half_year.iloc[:, -2:].sum(axis=1)
last_4_sum = pivot_table_half_quarter.iloc[:, -4:].sum(axis=1)


# Select the rows where the sum is greater than 0
pivot_table_month = pivot_table_month[last_12_sum > 0]
pivot_table_month.reset_index(inplace=True, drop=True)

pivot_table_half_year = pivot_table_half_year[last_2_sum > 0]
pivot_table_half_year.reset_index(inplace=True, drop=True)

pivot_table_half_quarter = pivot_table_half_quarter[last_4_sum > 0]
pivot_table_half_quarter.reset_index(inplace=True, drop=True)


# calculate mean and std deviation for each product from the first month where it was sold
# Apply the function to each row of the dataframe, and store the results in new columns
pivot_table_month[['Mean_from_first', 'Std_from_first']] = pivot_table_month.apply(mean_std_from_first_nonzero, axis=1)
pivot_table_half_year[['Mean_from_first_hy', 'Std_from_first_hy']] = pivot_table_half_year.apply(mean_std_from_first_nonzero, axis=1)
pivot_table_half_quarter[['Mean_from_first_q', 'Std_from_first_q']] = pivot_table_half_quarter.apply(mean_std_from_first_nonzero, axis=1)

# calculate mean and std deviation for each product
df_mean = pivot_table_month.iloc[:, 1:].mean(axis=1)
df_std = pivot_table_month.iloc[:, 1:].std(axis=1)

df_mean_h = pivot_table_half_year.iloc[:, 1:].mean(axis=1)
df_std_h = pivot_table_half_year.iloc[:, 1:].std(axis=1)

df_mean_q = pivot_table_half_quarter.iloc[:, 1:].mean(axis=1)
df_std_q = pivot_table_half_quarter.iloc[:, 1:].std(axis=1)

# categorize products based on std deviation
df_category = pd.cut(df_std, bins=[-float('inf'), 0.5*pivot_table_month['Mean_from_first'].mean(), pivot_table_month['Mean_from_first'].mean(), float('inf')], labels=['Z', 'Y', 'X'])
df_category_h = pd.cut(df_std_h, bins=[-float('inf'), 0.5*pivot_table_half_year['Mean_from_first_hy'].mean(), pivot_table_half_year['Mean_from_first_hy'].mean(), float('inf')], labels=['ZH', 'YH', 'XH'])
df_category_q = pd.cut(df_std_q, bins=[-float('inf'), 0.5*pivot_table_half_quarter['Mean_from_first_q'].mean(), pivot_table_half_quarter['Mean_from_first_q'].mean(), float('inf')], labels=['ZQ', 'YQ', 'XQ'])

# add new columns to the original dataframe
pivot_table_month = pivot_table_month.assign(Mean=df_mean, Std=df_std, Category=df_category)
pivot_table_month = pivot_table_month.loc[:,['ms_code','Mean_from_first', 'Std_from_first', 'Mean', 'Std', 'Category']]

pivot_table_half_year = pivot_table_half_year.assign(Mean_hy=df_mean_h, Std_hy=df_std_h, Category_hy=df_category_h)
pivot_table_half_year = pivot_table_half_year.loc[:,['ms_code','Mean_from_first_hy', 'Std_from_first_hy', 'Mean_hy', 'Std_hy', 'Category_hy']]
pivot_table_half_year = pivot_table_half_year.reset_index(level=0, drop=True)
pivot_table_half_year.columns = pivot_table_half_year.columns.get_level_values(0)

pivot_table_half_quarter = pivot_table_half_quarter.assign(Mean_q=df_mean_q, Std_q=df_std_q, Category_q=df_category_q)
pivot_table_half_quarter = pivot_table_half_quarter.loc[:,['ms_code','Mean_from_first_q', 'Std_from_first_q', 'Mean_q', 'Std_q', 'Category_q']]
pivot_table_half_quarter = pivot_table_half_quarter.reset_index(level=0, drop=True)
pivot_table_half_quarter.columns = pivot_table_half_quarter.columns.get_level_values(0)

writer = pd.ExcelWriter('data_files/order_data_XYZ.xlsx', engine='xlsxwriter')
pivot_table_month.to_excel(writer, sheet_name='month')
pivot_table_half_year.to_excel(writer, sheet_name='half year')
pivot_table_half_quarter.to_excel(writer, sheet_name='quarter')
writer.save()

In [9]:
# Pricing information
priced = pd.read_excel('data_files/priced_positions.xlsx')
priced = priced.loc[:,['Model', 'COGS', 'Chk']]

In [14]:
orders_data_opt_with_price = pd.merge(orders_data_opt, priced, left_on='ms_code', right_on='Model', how='left')

In [16]:
writer = pd.ExcelWriter('data_files/test.xlsx', engine='xlsxwriter')
orders_data_opt_with_price.to_excel(writer, sheet_name='month')
writer.save()

In [17]:
orders_data_opt = orders_data_opt_with_price.copy()

In [52]:
# ABC + XYZ analysis per bu

# Create a new Excel writer object
writer = pd.ExcelWriter('data_files/order_data_ABC_XYZ_per_bu.xlsx', engine='xlsxwriter')

# Get a list of the unique 'bu' values
bu_values = sorted(orders_data_opt['bu'].unique())

# Loop over each 'bu' value
for bu in bu_values:

    # Filter the data for the current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu]
    
    # prices 
    
    indexed_df = bu_df.set_index('ms_code')
    ms_code_price = indexed_df['COGS'].to_dict()
    ms_code_priced = pd.Series(ms_code_price)
    
    indexed_df_status = bu_df.set_index('ms_code')
    ms_code_status = indexed_df['Chk'].to_dict()
    ms_code_statused = pd.Series(ms_code_status)

    # Group by 'ms_code' and calculate the total 'order_intake_EUR'
    total_eur = bu_df.groupby('ms_code')['order_intake_EUR'].sum()
    total_quant = bu_df.groupby('ms_code')['order_intake_quantity'].sum()

    # Calculate the share of 'order_intake_EUR' for each 'ms_code'
    eur_share = total_eur / total_eur.sum()

    # Sort the 'ms_code' by descending 'order_intake_EUR'
    sorted_ms = total_eur.sort_values(ascending=False)

    # Calculate the cumulative sum of the sorted 'order_intake_EUR'
    cumulative_sum = sorted_ms.cumsum()

    # Calculate the percentage of the cumulative sum
    cumulative_percent = cumulative_sum / total_eur.sum() * 100

    # Categorize the 'ms_code' into ABC groups based on the cumulative percentage
    abc_group = pd.cut(cumulative_percent, bins=[0, 70, 90, 100], labels=['A', 'B', 'C'])

    # Create a new dataframe with the results
    results_df = pd.DataFrame({
        'Total_EUR': total_eur,
        'Share': eur_share,
        'Cumulative_Sum_EUR': cumulative_sum,
        'Cumulative_Percent': cumulative_percent,
        'ABC_Group': abc_group,
        'Quantity': total_quant,
        'COGS': ms_code_priced,
        'Status': ms_code_statused
    })
    

    # Write the results to a new sheet in the Excel file
    results_df.reset_index(inplace=True)
    results_df = results_df.rename(columns={'index': 'ms_code'})
    results_df = results_df.merge(pivot_table_month[['ms_code', 'Mean_from_first', 'Std_from_first', 'Category']],
                       on='ms_code', how='left')
    results_df= results_df.merge(pivot_table_half_quarter[['ms_code', 'Mean_from_first_q', 'Std_from_first_q', 'Category_q']],
                       on='ms_code', how='left')
    results_df = results_df.merge(pivot_table_half_year[['ms_code', 'Mean_from_first_hy', 'Std_from_first_hy', 'Category_hy']],
                       on='ms_code', how='left')
    
    abc_xyz = results_df.loc[:,['ms_code', 'Total_EUR', 'ABC_Group',  'Category', 'Quantity', 'Mean_from_first',
       'Category_q', 'Mean_from_first_q', 'Category_hy', 'Mean_from_first_hy', 'COGS', 'Status']]
    
    abc_xyz = abc_xyz.rename(columns={
        'ms_code': 'Model',
        'Mean_from_first': 'Mean',
        'Category': 'Category_XYZ',
        'Mean_from_first_q': 'Mean_Q',
        'Category_q': 'Category_XYZ_Q',
        'Mean_from_first_hy': 'Mean_HY',
        'Category_hy': 'Category_XYZ_HY'})
    
    abc_xyz['Quantity'] = abc_xyz['Quantity'].round(0)
    abc_xyz['Mean'] = abc_xyz['Mean'].round(0)
    abc_xyz['Mean_Q'] = abc_xyz['Mean_Q'].round(0)
    abc_xyz['Mean_HY'] = abc_xyz['Mean_HY'].round(0)
    
    abc_xyz = abc_xyz.sort_values(by=['ABC_Group', 'Category_XYZ', 'Total_EUR', 'Quantity'], ascending=[True, False, False, False])
    
    abc_xyz['sum'] = 0  # initialize 'sum' column to 0
    mask = (abc_xyz['ABC_Group'] == 'A') & (abc_xyz['Category_XYZ'] == 'X')  # create a boolean mask for the rows to multiply
    mask2 = (abc_xyz['Status'] == 1)
    abc_xyz.loc[mask2, 'COGS'] = ((abc_xyz.loc[mask2, 'Total_EUR'])* 0.7 / abc_xyz.loc[mask2, 'Quantity'])

    abc_xyz.loc[mask, 'sum'] = abc_xyz.loc[mask, 'COGS'] * abc_xyz.loc[mask, 'Mean']  # multiply the selected rows and assign to 'sum' column

    abc_xyz.to_excel(writer, sheet_name=bu)

# Save and close the Excel writer object
writer.save()