In [1]:
import pandas as pd
import numpy as np
import importlib

# keeping company information in additional file
import data_file

In [None]:
importlib.reload(data_file)

In [2]:
data_set = pd.read_excel('data_files/full_order_data_set.xlsx')

In [4]:
cee_affiliates = data_file.cee_affiliates # list of target affiliates

In [None]:
target_df = data_set[data_set['company_code_n'].isin(cee_affiliates)]
target_df.reset_index(inplace=True, drop=True)
target_df['tier'] = target_df['tier'].fillna(target_df['indirect_direct'])
target_df['FY'] = pd.to_datetime(target_df['FY'])  # Convert 'FY' column to datetime if it's not already

In [None]:
result = target_df.groupby(['company_code_n', target_df['FY'].dt.year]).agg({
    'sales_person_n': 'nunique',
    'customer_name': 'nunique'
}).reset_index()

# Rename the columns for clarity
result = result.rename(columns={'sales_person_n': 'unique_sales_person_count', 'customer_name': 'unique_customer_count'})

In [6]:
direct_indirect = target_df.pivot_table(index=['company_code_n', target_df['FY'].dt.year], columns='indirect_direct',
                        values='customer_name', aggfunc=pd.Series.nunique).reset_index()

In [7]:
tiers = target_df.pivot_table(index=['company_code_n', target_df['FY'].dt.year], columns='tier',
                        values='customer_name', aggfunc=pd.Series.nunique).reset_index()

tiers = tiers.fillna(0)

In [8]:
tiers_order_intake = target_df.pivot_table(index=['company_code_n', target_df['FY'].dt.year], columns='tier',
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

tiers_order_intake = tiers_order_intake.fillna(0)

In [9]:
pivot_table = target_df.pivot_table(
    index=['company_code_n','customer_name', 'tier'],
    columns=target_df['FY'].dt.year,
    values='order_intake_amount_eur',
    aggfunc='sum',
    fill_value=0
)

# Calculate the grand total by summing across all years
pivot_table['Grand Total'] = pivot_table.sum(axis=1)

# Sort the grand total column separately
pivot_table_sorted = pivot_table.sort_values('Grand Total', ascending=False)

# Reset the index of the sorted DataFrame
pivot_table_sorted.reset_index(inplace=True)

# Reorder the columns to have the desired format
columns_order = ['company_code_n', 'customer_name', 'tier'] + sorted(target_df['FY'].dt.year.unique()) + ['Grand Total']
pivot_table_sorted = pivot_table_sorted[columns_order]

In [36]:
# average deal sales order size

pivot_table_with_sales_order = target_df.pivot_table(
    index=['company_code_n','customer_name', 'tier', 'sales_order_so', 'FY'],
    values='order_intake_amount_eur',
    aggfunc='sum',
    fill_value=0
)
pivot_table_with_sales_order.reset_index(inplace=True)
pivot_table_with_sales_order['FY'] = pivot_table_with_sales_order['FY'].dt.year

pivot_table_with_sales_order = pivot_table_with_sales_order[pivot_table_with_sales_order['sales_order_so'] != 0]

pivot_table_with_sales_order = pivot_table_with_sales_order[['company_code_n', 'customer_name', 'tier', 'FY', 'order_intake_amount_eur']]

In [None]:
df = pivot_table_with_sales_order.copy()

# Grouping by unique combinations and calculating the average
average_sales_order_size = df.groupby(['company_code_n', 'customer_name', 'tier', 'FY'])['order_intake_amount_eur'].mean()

# Printing the result
print(average_sales_order_size)

In [43]:
writer = pd.ExcelWriter('outcome/all_sheets.xlsx')
pivot_table_sorted.to_excel(writer, sheet_name='order_intake_amount_eur', index=False)
tiers_order_intake.to_excel(writer, sheet_name='tiers_order_intake', index=False)
tiers.to_excel(writer, sheet_name='tiers', index=False)
direct_indirect.to_excel(writer, sheet_name='direct_indirect', index=False)
average_sales_order_size.to_excel(writer, sheet_name='average_sales_order_size')
# Save the Excel file
writer.close()

In [5]:
def group_identifier(dataframe, key, parameter):
    
    df = dataframe.copy()
    
    df['tier'] = df['tier'].fillna('Direct')
    
    if parameter == 'FY':
        df['FY'] = pd.to_datetime(df['FY']).dt.year
    
    # Group the DataFrame by sales_person_n and for_bu, and calculate the sum of order_intake_amount_eur
    grouped_df = df.groupby([key, 'tier', parameter])['order_intake_amount_eur'].sum().reset_index()

    # Calculate the total order intake for each salesperson
    total_order_intake = grouped_df.groupby(key)['order_intake_amount_eur'].sum().reset_index()

    # Sort salespersons by their total sum
    sorted_df = total_order_intake.sort_values('order_intake_amount_eur', ascending=False)

    # Calculate the cumulative sum and the percentage of total order intake
    sorted_df['cumulative_sum'] = sorted_df['order_intake_amount_eur'].cumsum()
    sorted_df['percentage'] = sorted_df['cumulative_sum'] / sorted_df['order_intake_amount_eur'].sum()

    # Assign identifiers 'A' or 'B' based on the percentage
    sorted_df['identifier'] = 'A'
    sorted_df.loc[sorted_df['percentage'] >= 0.8, 'identifier'] = 'B'

    # Merge the identifier column with the sorted_df
    merged_df = pd.merge(grouped_df, sorted_df[[key, 'identifier']], on=key)

    # Pivot the merged_df to have for_bu as columns and sales_person_n as index
    pivot_df = merged_df.pivot(index=[key, 'tier'], columns=parameter, values='order_intake_amount_eur')
    
    pivot_df.reset_index(inplace=True)

    # Sort the pivot_df columns in ascending order
    #pivot_df = pivot_df[sorted(pivot_df.columns)]
    
    # Sort the pivot_df rows based on the total sum in descending order
    pivot_df['Total'] = pivot_df.sum(axis=1)
    pivot_df = pivot_df.sort_values('Total', ascending=False)
    
    
    columns = list(pivot_df.columns)
    

    # Add the identifier column to the pivot_df
    pivot_df = pd.merge(pivot_df, sorted_df[[key, 'identifier']], left_on=key, right_on=key)
    
    
    column_order = ['identifier'] + columns
     
    pivot_df = pivot_df[column_order]
    
    
    if parameter == 'FY':
        # Check for key with 0 or NaN sum for the last three years and assign 'x' to the new identifier column
        pivot_df['check_status'] = ''
        mask = pivot_df[column_order[-4: -1]].isna().all(axis=1) | (pivot_df[column_order[-4: -1]].sum(axis=1) == 0)
        pivot_df.loc[mask, 'check_status'] = 'x'
    
    
    
    return pivot_df   

In [6]:
def market_analysis_data_prep(data_df, target_affiliate):
    df = data_df[data_df['company_code_n']==target_affiliate]
    
    
    # analysis of how sales person contribute to bu orders 
    
    sales_person_bu_orders = group_identifier(df,'sales_person_n' ,'for_bu')
    sales_person_fy_orders = group_identifier(df, 'sales_person_n', 'FY')
    customer_name_bu_orders = group_identifier(df,'sold_to_customer_n' ,'for_bu')
    customer_name_fy_orders = group_identifier(df, 'sold_to_customer_n', 'FY')
    
    return sales_person_bu_orders, sales_person_fy_orders, customer_name_bu_orders, customer_name_fy_orders

In [None]:
# Create Excel writer object
writer = pd.ExcelWriter('outcome/sales_summary.xlsx')

for affiliate in cee_affiliates:
    sales_person_bu_orders, sales_person_fy_orders, customer_name_bu_orders, customer_name_fy_orders = market_analysis_data_prep(data_set, affiliate)
    
    # Write dataframe to a new sheet in the Excel file
    sales_person_bu_orders.to_excel(writer, sheet_name=f'bu_{affiliate}', index=False)
    sales_person_fy_orders.to_excel(writer, sheet_name=f'fy_{affiliate}', index=False)
    customer_name_bu_orders.to_excel(writer, sheet_name=f'cust_bu_{affiliate}', index=False)
    customer_name_fy_orders.to_excel(writer, sheet_name=f'cust_fy_{affiliate}', index=False)


# Save the Excel file
writer.close()

In [6]:
writer = pd.ExcelWriter('outcome/order_intake.xlsx')
target_df.to_excel(writer, sheet_name='target_df', index=False)
# Save the Excel file
writer.close()