In [85]:
import pandas as pd
import numpy as np
import importlib

import data_file

In [None]:
importlib.reload(data_file)

In [87]:
target_bu = data_file.target_bu

In [157]:
df = pd.read_excel('data_files/order_intake.xlsx')

In [158]:
# Create a mapping dictionary from non-null values in "bu2" and "for_bu" columns
mapping_dict = df.dropna().set_index('bu2')['for_bu'].to_dict()

In [159]:
# Fill NaN values in "for_bu" column using the mapping dictionary
df['for_bu'] = df['for_bu'].fillna(df['bu2'].map(mapping_dict))

In [160]:
# Convert 'FY' column to datetime if it's not already
df['FY'] = pd.to_datetime(df['FY'])
df['FY'] = df['FY'].dt.year

In [79]:
customer_tiers_order_intake = df.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type', df['FY']],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake = customer_tiers_order_intake.fillna(0)
customer_tiers_order_intake['qty'] = 1

In [80]:
customer_tiers_order_intake_no_fy = df.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type'],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake_no_fy = customer_tiers_order_intake_no_fy.fillna(0)
customer_tiers_order_intake_no_fy['qty'] = 1

In [81]:
writer = pd.ExcelWriter('outcome/test.xlsx')
customer_tiers_order_intake_no_fy.to_excel(writer, sheet_name='overall', index=False)
customer_tiers_order_intake.to_excel(writer, sheet_name='fy', index=False)
# Save the Excel file
writer.close()

In [161]:
df['sales_order_so'] = df['sales_order_so'].fillna('not_known')

# it is necessary to make the similar data for Produtcts only
df_target_bu = df[df['bu2'].isin(target_bu)]

In [93]:
customer_tiers_order_intake_bu = df_target_bu.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type', df['FY']],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake_bu = customer_tiers_order_intake_bu.fillna(0)
customer_tiers_order_intake_bu['qty'] = 1

customer_tiers_order_intake_no_fy_bu = df_target_bu.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type'],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake_no_fy_bu = customer_tiers_order_intake_no_fy_bu.fillna(0)
customer_tiers_order_intake_no_fy_bu['qty'] = 1

writer = pd.ExcelWriter('outcome/test_2.xlsx')
customer_tiers_order_intake_no_fy_bu.to_excel(writer, sheet_name='overall', index=False)
customer_tiers_order_intake_bu.to_excel(writer, sheet_name='fy', index=False)
# Save the Excel file
writer.close()

In [65]:
writer = pd.ExcelWriter('outcome/test.xlsx')
unique_by_year.to_excel(writer, sheet_name='test1', index=False)
unique_overall.to_excel(writer, sheet_name='test2', index=False)
# Save the Excel file
writer.close()

In [110]:
# Calculate the average deal size for each company_name and FY
average_deal_size = df.groupby(['company_code_n','customer_name','tier', 'type','FY']).agg({'order_intake_amount_eur': 'mean', 'sales_order_so': 'count'}).reset_index()
average_deal_size = average_deal_size.rename(columns={'sales_order_so': 'number_of_deals'})
average_deal_size_bu = df_target_bu.groupby(['company_code_n','customer_name','tier', 'type','FY']).agg({'order_intake_amount_eur': 'mean', 'sales_order_so': 'count'}).reset_index()
average_deal_size_bu = average_deal_size_bu.rename(columns={'sales_order_so': 'number_of_deals'})

In [170]:
# Calculate the overall sum of order_intake_amount_eur for each company_code_n
company_sum = df.groupby('company_code_n')['order_intake_amount_eur'].sum()

# Sort the customers within each company_code_n based on their sum of order_intake_amount_eur in descending order
df_sorted = df.groupby(['company_code_n', 'customer_name'])['order_intake_amount_eur'].sum().reset_index()

df_sorted = df_sorted.sort_values(by=['company_code_n', 'order_intake_amount_eur'], ascending=[True, False])

# Calculate the cumulative sum of order_intake_amount_eur within each company_code_n group
df_sorted['cumulative_sum'] = df_sorted.groupby('company_code_n')['order_intake_amount_eur'].cumsum()

# Calculate the cumulative percentage of order_intake_amount_eur for each company_code_n
df_sorted['cumulative_percentage'] = df_sorted.groupby('company_code_n')['cumulative_sum'].transform(lambda x: x / x.max() * 100)

# Categorize the customers based on specific percentage thresholds
df_sorted['category'] = pd.cut(df_sorted['cumulative_percentage'], bins=[0, 70, 95, 100], labels=['A', 'B', 'C'], right=False)

# Merge the categorization back to the original DataFrame
merged_df = pd.merge(df, df_sorted[['company_code_n', 'customer_name', 'category']], on=['company_code_n', 'customer_name'], how='left')

# Output the resulting DataFrame
result = merged_df[['company_code_n', 'customer_name', 'category']]

result['category']= result['category'].fillna('C')
result = result.drop_duplicates()

In [176]:
average_deal_size_x = average_deal_size.merge(result, on=['customer_name', 'company_code_n'])

In [177]:
writer = pd.ExcelWriter('outcome/deals.xlsx')
average_deal_size_x.to_excel(writer, sheet_name='test1', index=False)
# Save the Excel file
writer.close()