In [34]:
import pandas as pd
import numpy as np

# Read Data1.xlsx into a dataframe
orders_data_fy17_fy21 = pd.read_excel('data_files/Data1.xlsx')

# Read Data2.xlsx into a dataframe
orders_data_fy21_fy22 = pd.read_excel('data_files/Data2.xlsx')

In [35]:
# Keep desired columns
temp_df = orders_data_fy17_fy21[['year_month', 'company_code_n', 'sold_to_customer_n_latest', 'bu', 'bu_n', 'material', 'ms_code', 'order_intake_quantity', 'order intake EUR']]

# Parse 'year_month' column and create 'FY' column. FY start from 4 month
temp_df['year_month_date'] = pd.to_datetime(temp_df['year_month'], format='%Y%m')
temp_df['FY'] = np.where(temp_df['year_month_date'].dt.month >= 4, temp_df['year_month_date'].dt.year, temp_df['year_month_date'].dt.year - 1)

# Create fiscal 'quarter' column
quarter_dict = {1: 'Q4', 2: 'Q4', 3: 'Q4', 4: 'Q1', 5: 'Q1', 6: 'Q1', 7: 'Q2', 8: 'Q2', 9: 'Q2', 10: 'Q3', 11: 'Q3', 12: 'Q3'}
temp_df['quarter'] = temp_df['year_month_date'].dt.month.map(quarter_dict)

# Create 'half_year' column
temp_df['half_year'] = np.where(temp_df['year_month_date'].dt.month.between(4, 9), 'HY1', 'HY2')

# Rename columns
temp_df = temp_df.rename(columns={'sold_to_customer_n_latest': 'customer', 'order intake EUR': 'order_intake_EUR'})


In [25]:
# Check new_df
new_df.to_excel('data_files/new_file.xlsx', index=False)

In [None]:
temp_df2 = orders_data_fy21_fy22[['year_month', 'company_code_n', 'sold_to_customer_n_latest', 'bu', 'bu_n', 'material', 'ms_code', 'order_intake_quantity', 'Order Intake Euro']]

# Parse 'year_month' column and create 'FY' column. FY start from 4 month
temp_df2['year_month_date'] = pd.to_datetime(temp_df2['year_month'], format='%Y%m')
temp_df2['FY'] = np.where(temp_df2['year_month_date'].dt.month >= 4, temp_df2['year_month_date'].dt.year, temp_df2['year_month_date'].dt.year - 1)

# Create fiscal 'quarter' column
quarter_dict = {1: 'Q4', 2: 'Q4', 3: 'Q4', 4: 'Q1', 5: 'Q1', 6: 'Q1', 7: 'Q2', 8: 'Q2', 9: 'Q2', 10: 'Q3', 11: 'Q3', 12: 'Q3'}
temp_df2['quarter'] = temp_df2['year_month_date'].dt.month.map(quarter_dict)

# Create 'half_year' column
temp_df2['half_year'] = np.where(temp_df2['year_month_date'].dt.month.between(4, 9), 'HY1', 'HY2')

# Rename columns
temp_df2 = temp_df2.rename(columns={'sold_to_customer_n_latest': 'customer', 'Order Intake Euro': 'order_intake_EUR'})

In [32]:
# Check new_df2
temp_df2.to_excel('data_files/new_file2.xlsx', index=False)

In [37]:
# combine two prepared df
orders_data = pd.concat([temp_df, temp_df2], ignore_index=True)
# Check sales_data
orders_data.to_excel('data_files/orders_data.xlsx', index=False)

In [50]:
orders_data_opt = orders_data.copy()
# Reduce the memory usage of the dataframe and improve performance
def check_unique_values(df):
    changed_columns = []
    for col in df.columns:
        unique_values = df[col].nunique()
        if unique_values < 50:
            df[col] = df[col].astype('category')
            changed_columns.append(col)
    if len(changed_columns) > 0:
        print("The following columns were changed to categorical data type: ")
        for col in changed_columns:
            print(col)
    else:
        print("No columns were changed to categorical data type.")
        
check_unique_values(orders_data_opt)

The following columns were changed to categorical data type: 
company_code_n
bu
bu_n
FY
quarter
half_year


In [69]:
# Delete rows where 'ms_code' contains text with 'BOP'
orders_data_opt['ms_code'] = orders_data_opt['ms_code'].astype(str)
orders_data_opt = orders_data_opt[~orders_data_opt['ms_code'].str.contains('BOP')]

In [72]:
# Create Excel writer object
writer = pd.ExcelWriter('data_files/sales_data_bu_sheets_total_amount.xlsx', engine='xlsxwriter')

# Loop over unique values in 'bu' column
for bu in orders_data_opt['bu'].unique():
    # Create new dataframe for current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu][['ms_code', 'order_intake_EUR']]
    
    # Group by ms_code and sum order_intake_EUR
    bu_df = bu_df.groupby(['ms_code']).sum().reset_index()
    bu_df = bu_df.sort_values('order_intake_EUR', ascending=False)
    
    # Write dataframe to a new sheet in the Excel file
    bu_df.to_excel(writer, sheet_name=f'{bu}', index=False)

# Save the Excel file
writer.save()

In [73]:
# Create Excel writer object
writer = pd.ExcelWriter('data_files/sales_data_bu_sheets_total_quantity_per_month.xlsx', engine='xlsxwriter')

# Loop over unique values in 'bu' column
for bu in orders_data_opt['bu'].unique():
    # Create new dataframe for current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu][['year_month_date', 'ms_code', 'order_intake_quantity']]
    
    # Group by month and ms_code and sum order_intake_quantity and order_intake_EUR
    bu_df = bu_df.groupby(['year_month_date', 'ms_code']).sum().reset_index()
    bu_df = bu_df.sort_values('order_intake_quantity', ascending=False)
    
    # Write dataframe to a new sheet in the Excel file
    bu_df.to_excel(writer, sheet_name=f'{bu}', index=False)

# Save the Excel file
writer.save()

In [130]:
# ABC analysis per bu
# Create a new Excel writer object
writer = pd.ExcelWriter('data_files/order_data_ABC_per_bu.xlsx', engine='xlsxwriter')

# Get a list of the unique 'bu' values
bu_values = orders_data_opt['bu'].unique()

# Loop over each 'bu' value
for bu in bu_values:

    # Filter the data for the current 'bu' value
    bu_df = orders_data_opt[orders_data_opt['bu'] == bu]

    # Group by 'ms_code' and calculate the total 'order_intake_EUR'
    total_eur = bu_df.groupby('ms_code')['order_intake_EUR'].sum()

    # Calculate the share of 'order_intake_EUR' for each 'ms_code'
    eur_share = total_eur / total_eur.sum()

    # Sort the 'ms_code' by descending 'order_intake_EUR'
    sorted_ms = total_eur.sort_values(ascending=False)

    # Calculate the cumulative sum of the sorted 'order_intake_EUR'
    cumulative_sum = sorted_ms.cumsum()

    # Calculate the percentage of the cumulative sum
    cumulative_percent = cumulative_sum / total_eur.sum() * 100

    # Categorize the 'ms_code' into ABC groups based on the cumulative percentage
    abc_group = pd.cut(cumulative_percent, bins=[0, 70, 90, 100], labels=['A', 'B', 'C'])

    # Create a new dataframe with the results
    results_df = pd.DataFrame({
        'Total_EUR': total_eur,
        'Share': eur_share,
        'Cumulative_Sum_EUR': cumulative_sum,
        'Cumulative_Percent': cumulative_percent,
        'ABC_Group': abc_group
    })

    # Write the results to a new sheet in the Excel file
    results_df.to_excel(writer, sheet_name=bu)

# Save and close the Excel writer object
writer.save()

In [145]:
# Extract year and month from 'year_month' column
df = orders_data_opt.copy()
df['year_month'] = pd.to_datetime(df['year_month'], format='%Y%m')
df['year'] = df['year_month'].dt.year
df['month'] = df['year_month'].dt.month

# Create a new Excel writer object
writer = pd.ExcelWriter('data_files/stock_XYZ.xlsx', engine='xlsxwriter')

# Get a list of the unique 'bu' values
bu_values = df['bu'].unique()

# Loop over each 'bu' value
for bu in bu_values:

    # Filter the data for the current 'bu' value
    bu_df = df[df['bu'] == bu]
    
    orders = bu_df.groupby('ms_code')['order_intake_quantity'].sum()
    orders = orders.reset_index()
    df_monthly_orders = bu_df.groupby(['ms_code', pd.Grouper(key='year_month', freq='M')])['order_intake_quantity'].sum()
    df_mean_std = df_monthly_orders.groupby('ms_code').agg(['mean', 'std']).fillna(0)
    df_mean_std.reset_index()
    
    # Merge the 'ms_stats' DataFrame with the 'total_qty' DataFrame
    orders = orders.merge(df_mean_std, on='ms_code')
    
    orders.to_excel(writer, sheet_name=bu)

"""

    # Group by 'year_month' and 'ms_code' and calculate the total 'order_intake_quantity'
    total_qty = bu_df.groupby(['year_month', 'ms_code'])['order_intake_quantity'].sum()

    # Reset the index to turn 'year_month' and 'ms_code' back into columns
    total_qty = total_qty.reset_index()
    
    # Group by 'ms_code' and calculate the mean and standard deviation of 'order_intake_quantity'
    ms_stats = total_qty.groupby('ms_code')['order_intake_quantity'].agg(['mean', 'std']).fillna(0)


# assuming your data is in a DataFrame called 'df'
df['year_month'] = pd.to_datetime(df['year_month']) # convert year_month column to datetime format
df_monthly_sales = df.groupby(['ms_code', pd.Grouper(key='year_month', freq='M')])['order_intake_quantity'].sum()
df_mean_std = df_monthly_sales.groupby('ms_code').agg(['mean', 'std'])
    

    # Calculate the frequency of ordering for each 'ms_code' within each 'year_month'
    ms_order_count = total_qty.groupby('ms_code')['order_intake_quantity'].count()
    ms_order_count_mean = ms_order_count.mean()
    ms_order_count_std = ms_order_count.std()
    ms_order_count = ms_order_count.fillna(0)  # Replace NaN values with 0
    if np.isnan(ms_order_count_std):
        ms_order_count_std = 1
    #x_group = pd.cut(ms_order_count, bins=[-1, ms_order_count_mean - ms_order_count_std, ms_order_count_mean + ms_order_count_std, ms_order_count.max()], labels=['X', 'Y', 'Z'])
    
    # Sort the bin edges in ascending order
    bins = sorted([-1, ms_order_count_mean - ms_order_count_std, ms_order_count_mean + ms_order_count_std, ms_order_count.max()])

    # Use the sorted bin edges to create the XYZ groups
    x_group = pd.cut(ms_order_count, bins=bins, labels=['X', 'Y', 'Z'])

    # Calculate the total 'order_intake_EUR' for each 'ms_code'
    total_eur = bu_df.groupby('ms_code')['order_intake_EUR'].sum()

    # Calculate the share of 'order_intake_EUR' for each 'ms_code'
    eur_share = total_eur / total_eur.sum()

    # Sort the 'ms_code' by descending 'order_intake_EUR'
    sorted_ms = total_eur.sort_values(ascending=False)

    # Calculate the cumulative sum of the sorted 'order_intake_EUR'
    cumulative_sum = sorted_ms.cumsum()

    # Calculate the percentage of the cumulative sum
    cumulative_percent = cumulative_sum / total_eur.sum() * 100

    # Categorize the 'ms_code' into ABC groups based on the cumulative percentage
    abc_group = pd.cut(cumulative_percent, bins=[0, 70, 90, 100], labels=['A', 'B', 'C']) 

    # Create a new dataframe with the results
    results_df = pd.DataFrame({
        'Total_EUR': total_eur,
        'Share': eur_share,
        'Cumulative_Sum_EUR': cumulative_sum,
        'Cumulative_Percent': cumulative_percent,
        'ABC_Group': abc_group,
        'Order_Count': ms_order_count,
        'XYZ_Group': x_group,
        'Order_Count_Mean': total_qty['mean'],
         'Order_Count_STD': total_qty['std']
    })

    # Write the results to a new sheet in the Excel file
    results_df.to_excel(writer, sheet_name=bu)"""
    

# Save and close
writer.save()

In [148]:
df_monthly_orders

ms_code           year_month
COMRDS/Z          2021-09-30    16.0
                  2021-11-30     0.0
                  2021-12-31     0.0
                  2022-01-31     0.0
                  2022-02-28     0.0
                                ... 
XS770A-A2K2-A1CA  2022-11-30    16.0
                  2022-12-31    80.0
                  2023-01-31     2.0
                  2023-02-28     2.0
                  2023-03-31     0.0
Name: order_intake_quantity, Length: 486, dtype: float64