In [1]:
import pandas as pd
import numpy as np
import importlib

import data_file

In [None]:
importlib.reload(data_file)

In [23]:
target_bu = data_file.target_bu

In [24]:
df = pd.read_excel('data_files/order_intake.xlsx')

In [25]:
# Create a mapping dictionary from non-null values in "bu2" and "for_bu" columns
mapping_dict = df.dropna().set_index('bu2')['for_bu'].to_dict()

In [26]:
# Fill NaN values in "for_bu" column using the mapping dictionary
df['for_bu'] = df['for_bu'].fillna(df['bu2'].map(mapping_dict))

In [27]:
# Convert 'FY' column to datetime if it's not already
df['FY'] = pd.to_datetime(df['FY'])
df['FY'] = df['FY'].dt.year

In [28]:
# make reference for target bu and all other projects
def check_value(value):
    if value in target_bu:
        return 'product bu'
    else:
        return 'other'

# Create the new column using apply
df['department'] = df['bu2'].apply(lambda x: check_value(x))

In [39]:
# there is product and other department

customer_tiers_order_intake = df.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type', 'department',  df['FY']],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake = customer_tiers_order_intake.fillna(0)
customer_tiers_order_intake['qty'] = 1

# there is no product and other department

customer_tiers_order_intake_no_d = df.pivot_table(index=['company_code_n', 'customer_name', 'tier', 'type',  df['FY']],
                        values='order_intake_amount_eur', aggfunc='sum').reset_index()

customer_tiers_order_intake_no_d = customer_tiers_order_intake_no_d.fillna(0)
customer_tiers_order_intake_no_d['qty'] = 1

writer = pd.ExcelWriter('outcome/test.xlsx')
customer_tiers_order_intake_no_d.to_excel(writer, sheet_name='overall', index=False)
customer_tiers_order_intake.to_excel(writer, sheet_name='fy', index=False)
# Save the Excel file
writer.close()

In [20]:
# Calculate the average deal size for each company_name and FY

average_deal_size = df.groupby(['company_code_n','customer_name','tier', 'type','FY']).agg({'order_intake_amount_eur': 'mean', 'sales_order_so': 'count'}).reset_index()
average_deal_size = average_deal_size.rename(columns={'sales_order_so': 'number_of_deals'})


df_target_bu = df[df['bu2'].isin(target_bu)]

average_deal_size_bu = df_target_bu.groupby(['company_code_n','customer_name','tier', 'type','FY']).agg({'order_intake_amount_eur': 'mean', 'sales_order_so': 'count'}).reset_index()
average_deal_size_bu = average_deal_size_bu.rename(columns={'sales_order_so': 'number_of_deals'})


writer = pd.ExcelWriter('outcome/average_deals.xlsx')
average_deal_size.to_excel(writer, sheet_name='as', index=False)
average_deal_size_bu.to_excel(writer, sheet_name='asbu', index=False)
# Save the Excel file
writer.close()

In [None]:
# Calculate the overall sum of order_intake_amount_eur for each company_code_n
company_sum = df.groupby('company_code_n')['order_intake_amount_eur'].sum()

# Sort the customers within each company_code_n based on their sum of order_intake_amount_eur in descending order
df_sorted = df.groupby(['company_code_n', 'customer_name'])['order_intake_amount_eur'].sum().reset_index()

df_sorted = df_sorted.sort_values(by=['company_code_n', 'order_intake_amount_eur'], ascending=[True, False])

# Calculate the cumulative sum of order_intake_amount_eur within each company_code_n group
df_sorted['cumulative_sum'] = df_sorted.groupby('company_code_n')['order_intake_amount_eur'].cumsum()

# Calculate the cumulative percentage of order_intake_amount_eur for each company_code_n
df_sorted['cumulative_percentage'] = df_sorted.groupby('company_code_n')['cumulative_sum'].transform(lambda x: x / x.max() * 100)

# Categorize the customers based on specific percentage thresholds
df_sorted['category'] = pd.cut(df_sorted['cumulative_percentage'], bins=[0, 70, 95, 100], labels=['A', 'B', 'C'], right=False)

# Merge the categorization back to the original DataFrame
merged_df = pd.merge(df, df_sorted[['company_code_n', 'customer_name', 'category']], on=['company_code_n', 'customer_name'], how='left')

# Output the resulting DataFrame
result = merged_df[['company_code_n', 'customer_name', 'category']]

result['category']= result['category'].fillna('C')
result = result.drop_duplicates()

In [30]:
average_deal_size_x = average_deal_size.merge(result, on=['customer_name', 'company_code_n'])

In [31]:
writer = pd.ExcelWriter('outcome/deals.xlsx')
average_deal_size_x.to_excel(writer, sheet_name='test1', index=False)
# Save the Excel file
writer.close()

In [38]:
# Grouping by 'company_code_n' and 'FY', and calculating the number of unique customers for each salesperson
result = df.groupby(['company_code_n', 'FY', 'sales_person_n', 'tier'])['customer_name'].nunique()


# Converting the Series to a DataFrame
result_df = result.to_frame().reset_index()

# Writing the DataFrame to an Excel file
result_df.to_excel('result.xlsx', index=False)

In [39]:
# Grouping by 'company_code_n' and 'FY', and calculating the number of unique customers for each salesperson
result_sales_person = df.groupby(['company_code_n', 'FY'])['sales_person_n'].nunique()
result_customer = df.groupby(['company_code_n', 'FY'])['customer_name'].nunique()
result_sales_order = df.groupby(['company_code_n', 'FY'])['sales_order_so'].nunique()
result_order_intake = df.groupby(['company_code_n', 'FY'])['order_intake_amount_eur'].sum()

# Converting the Series to a DataFrame
result_df = result.to_frame().reset_index()

# Writing the DataFrame to an Excel file
result_df.to_excel('result_2.xlsx', index=False)

In [41]:
# Grouping and aggregating for 'sales_person_n'
result_sales_person = df.groupby(['company_code_n', 'FY'])['sales_person_n'].nunique().reset_index()
result_sales_person.rename(columns={'sales_person_n': 'sales_person_count'}, inplace=True)

# Grouping and aggregating for 'customer_name'
result_customer = df.groupby(['company_code_n', 'FY'])['customer_name'].nunique().reset_index()
result_customer.rename(columns={'customer_name': 'customer_count'}, inplace=True)

# Grouping and aggregating for 'sales_order_so'
result_sales_order = df.groupby(['company_code_n', 'FY'])['sales_order_so'].nunique().reset_index()
result_sales_order.rename(columns={'sales_order_so': 'sales_order_count'}, inplace=True)

# Grouping and aggregating for 'order_intake_amount_eur'
result_order_intake = df.groupby(['company_code_n', 'FY'])['order_intake_amount_eur'].sum().reset_index()
result_order_intake.rename(columns={'order_intake_amount_eur': 'order_intake_sum'}, inplace=True)

# Combining the results into a single DataFrame
result_df = pd.merge(result_sales_person, result_customer, on=['company_code_n', 'FY'])
result_df = pd.merge(result_df, result_sales_order, on=['company_code_n', 'FY'])
result_df = pd.merge(result_df, result_order_intake, on=['company_code_n', 'FY'])

# Writing the DataFrame to an Excel file
result_df.to_excel('result_3.xlsx', index=False)


In [65]:
# Grouping the data by 'company_code_n' and calculating the statistics for each group
statistics = df_mod.groupby(['company_code_n', 'tier'])['order_intake_amount_eur'].describe()

# Saving the statistics for each group to an Excel file
statistics.to_excel('statistics_by_company.xlsx')

In [60]:
df_mod = df[['company_code_n', 'tier','sales_order_so', 'order_intake_amount_eur']]

In [61]:
df_mod = df_mod.groupby(['company_code_n', 'tier', 'sales_order_so'])['order_intake_amount_eur'].sum().reset_index()

In [62]:
df_mod = df_mod.drop(df_mod.loc[df_mod['order_intake_amount_eur'] == 0].index)