In [114]:
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
import importlib
import sqlite3
import numpy as np

# keeping company information in additional file
import data_file

In [2]:
conn = sqlite3.connect('data_files/order_data.db')
query = "SELECT * FROM orders"  # Replace 'tablename' with your table name
df_orders = pd.read_sql_query(query, conn)
conn.close()

In [3]:
df_orders = df_orders[['company_code_n', 'FY', 'bu2', 'sales_order_so', 'sold_to_customer', 'eu_industry_n', 'order_intake_amount_eur', 'eu_industry_segment_n_latest']]

In [4]:
conn2 = sqlite3.connect('data_files/customer_data.db')
query = "SELECT * FROM customers" 
df_customers = pd.read_sql_query(query, conn2)
conn2.close()

In [5]:
df_customers  = df_customers[['sold_to_customer', 'customer_name', 'type', 'tier_new', 'countries']]
df_customers.columns  = ['sold_to_customer', 'customer_name', 'type', 'business type', 'Country']

In [6]:
# PREPARATION OF THE DATA FILE WITH INDUSTRIES
so_industries_init = pd.read_excel('data_files/industries.xlsx')

In [None]:
# Ensure the data type of 'sales_order_so' column is the same in both DataFrames
df_orders['sales_order_so'] = df_orders['sales_order_so'].astype(str)
so_industries_init['sales_order_so'] = so_industries_init['sales_order_so'].astype(str)

# Merge the DataFrames
merged_df = df_orders.merge(so_industries_init[['sales_order_so', 'ec_eu_industry_n', 'eu_industry_segment_n_latest']], 
                     on='sales_order_so', 
                     how='left')

print(len(merged_df))

# Fill in the missing values
merged_df['eu_industry_n'] = merged_df['eu_industry_n'].fillna(merged_df['ec_eu_industry_n'])
merged_df['eu_industry_segment_n_latest'] = merged_df['eu_industry_segment_n_latest_x'].fillna(merged_df['eu_industry_segment_n_latest_y'])

# Drop unnecessary columns
merged_df = merged_df.drop(columns=['ec_eu_industry_n', 'eu_industry_segment_n_latest_x', 'eu_industry_segment_n_latest_y'])

print(len(merged_df))

df_customers['sold_to_customer'] = df_customers['sold_to_customer'].astype(str)
merged_df['sold_to_customer'] = merged_df['sold_to_customer'].astype(str)
merged_with_types = merged_df.merge(df_customers, on='sold_to_customer', how='left')

print(len(merged_with_types))

merged_with_types['business type'] = merged_with_types['business type'].fillna('Direct')
merged_with_types['eu_industry_n'] = merged_with_types['eu_industry_n'].fillna('Others')
merged_with_types['eu_industry_segment_n_latest'] = merged_with_types['eu_industry_segment_n_latest'].fillna('Others')

# Combine unique values from 'eu_industry_n' for each company
merged_with_types['eu_industry_n'] = merged_with_types.groupby('customer_name')['eu_industry_n'].transform(lambda x: '; '.join(sorted(set(x))))
merged_with_types['eu_industry_segment_n_latest'] = merged_with_types.groupby('customer_name')['eu_industry_segment_n_latest'].transform(lambda x: '; '.join(sorted(set(x))))

In [8]:
target_bu = data_file.target_bu

In [131]:
def soyears(temp, q=''):
    if q == 'bu':
        suf = '_bu'
    else:
        suf = ''
    
    df = temp.copy()
    
    # FY 23 is not full yet
    df = df[df['FY'] < 2023]

    # 1. Calculate average number of unique 'sales_order_so' per year ('FY') for each 'customer_name'
    grouped1 = (temp.groupby(['customer_name', 'FY'])
            .agg(unique_so_per_FY=pd.NamedAgg(column='sales_order_so', aggfunc='nunique'))
            .reset_index()
            .groupby('customer_name')
            .agg(so_number_per_FY=pd.NamedAgg(column='unique_so_per_FY', aggfunc='mean'))
            .reset_index())
    
    # 2. Calculate the average amount of 'order_intake_amount_eur' for each 'sales_order_so'
    grouped2 = (temp.groupby(['customer_name', 'sales_order_so'])
            .agg(mean_amount=pd.NamedAgg(column='order_intake_amount_eur', aggfunc='sum'))
            .reset_index()
            .groupby('customer_name')
            .agg(so_average_in_eur=pd.NamedAgg(column='mean_amount', aggfunc='mean'))
            .reset_index())
    
    # 3. Merge the results
    result_df = pd.merge(grouped1, grouped2, on='customer_name', how='inner') 
    result_df.columns = ['customer_name', f'so_number_per_FY{suf}', f'so_average_in_eur{suf}']
       
    return result_df


def anupamtenyears(opco, df_init):
    if opco != 'all':
        df = df_init[df_init['company_code_n'] == opco].copy()
    else:
        df = df_init[df_init['Country'] == 'Norway'].copy()
    
    df['FY'] = pd.to_datetime(df['FY'])
    df['FY'] = df['FY'].dt.year
    
    df['bu2'] = df['bu2'].astype(str)
    df_target_bu = df[df['bu2'].isin(target_bu)]
    
    so_df = soyears(df)
    so_df_bu = soyears(df_target_bu, q='bu')
    
    # Update the values in 'bu2' column if they are in the list
    df['bu2'] = df['bu2'].apply(lambda x: 'PCI_' + x if x in target_bu else x)
    
    
    print(df.order_intake_amount_eur.sum())
    print(df_target_bu.order_intake_amount_eur.sum())
    
    
    # Create the pivot table
    pivot_df = df.pivot_table(index=['customer_name','Country','type', 'business type', 'eu_industry_n', 'eu_industry_segment_n_latest'], columns='FY', values='order_intake_amount_eur', aggfunc='sum', fill_value=0) 
    pivot_df_target = df_target_bu.pivot_table(index=['customer_name', 'Country','type', 'business type', 'eu_industry_n', 'eu_industry_segment_n_latest'], columns='FY', values='order_intake_amount_eur', aggfunc='sum', fill_value=0) 
    pivot_df_bu = df.pivot_table(index=['customer_name', 'Country', 'type', 'business type', 'eu_industry_n', 'eu_industry_segment_n_latest','FY'], columns='bu2', values='order_intake_amount_eur', aggfunc='sum', fill_value=0)

    
    # Add the 'Total' column
    pivot_df['Total'] = pivot_df.sum(axis=1)
    pivot_df_target['Total'] = pivot_df_target.sum(axis=1)
    
    pivot_df = pivot_df[['Total']]
    pivot_df.columns = ['Total_Total']
    
    
    result = pivot_df.merge(pivot_df_target, left_index=True, right_index=True, how='left')
    result['Yoko Product Share'] = result['Total'] / result['Total_Total']
    result['Yoko Product Share'].fillna(0, inplace=True)
    result = result[[2017, 2018, 2019, 2020, 2021, 2022, 2023, 'Total', 'Total_Total', 'Yoko Product Share']]
    result = result.sort_values(by=['Total_Total', 'Yoko Product Share'], ascending=False)
    result.fillna(0, inplace=True)
    result.reset_index(inplace=True)
    
    result = result.merge(so_df, how='left', on='customer_name')
    result = result.merge(so_df_bu, how='left', on='customer_name')
    
    pivot_df_bu['Total'] = pivot_df_bu.sum(axis=1)
    
    return result, pivot_df_bu

In [None]:
# all opcos preparation
opco_list = ['all']

for opco in opco_list:
    
    df_for_excel = anupamtenyears(opco, merged_with_types)[0]
    df_target_for_excel = anupamtenyears(opco, merged_with_types)[1]
    
    writer = pd.ExcelWriter(f'data_files/outcome/{opco}_.xlsx')
    df_for_excel.to_excel(writer, sheet_name=f'years', index=True)
    df_target_for_excel.to_excel(writer, sheet_name=f'bu_years', index=True)
    writer.close()

In [65]:
# discont analysis
discounts_df = pd.read_excel('data_files/discounts.xlsx')

discounts_df['sstp_approval_no'] = discounts_df['sstp_approval_no'].notna().astype(int)

In [None]:
df_customers['sold_to_customer'] = df_customers['sold_to_customer'].astype(str)
discounts_df['sold_to_customer'] = discounts_df['sold_to_customer'].astype(str)
discounts_df['sales_order_so'] = discounts_df['sales_order_so'].astype(str)
discounts_with_customer_name = discounts_df.merge(df_customers, on='sold_to_customer', how='left')

print(len(discounts_with_customer_name))

discounts_with_customer_name['business type'] = discounts_with_customer_name['business type'].fillna('Direct')
discounts_with_customer_name['eu_industry_segment_n_latest'] = discounts_with_customer_name['eu_industry_segment_n_latest'].fillna('Others')

discounts_with_customer_name = discounts_with_customer_name[~discounts_with_customer_name['type'].isna()]

wdf = discounts_with_customer_name.copy()

wdf = wdf[['customer_name', 'type', 'business type', 'Country', 'fiscal_year', 'year_month', 'sales_order_so',
       'sstp_approval_no', 'eu_industry_segment_n_latest', 'bu', 'material', 'ms_code',
       'sales_quantity', 'sales_rsp_eur', 'sales_eur', 'cogs(ms)_eur',
       'gp_eur']]

wdf['FY'] = wdf['fiscal_year']

In [112]:
pivot_wdf_qty = wdf.pivot_table(index=['customer_name','Country','type', 'business type'], columns='FY', values='sales_quantity', aggfunc='sum', fill_value=0) 
pivot_wdf_sales_rsp = wdf.pivot_table(index=['customer_name','Country','type', 'business type'], columns='FY', values='sales_rsp_eur', aggfunc='sum', fill_value=0)
pivot_wdf_sales = wdf.pivot_table(index=['customer_name','Country','type', 'business type'], columns='FY', values='sales_eur', aggfunc='sum', fill_value=0)
pivot_wdf_cogs = wdf.pivot_table(index=['customer_name','Country','type', 'business type'], columns='FY', values='cogs(ms)_eur', aggfunc='sum', fill_value=0)
pivot_wdf_gp = wdf.pivot_table(index=['customer_name','Country','type', 'business type'], columns='FY', values='gp_eur', aggfunc='sum', fill_value=0)

bu_pivot_wdf_qty = wdf.pivot_table(index='bu', columns='FY', values='sales_quantity', aggfunc='sum', fill_value=0) 
bu_pivot_wdf_sales_rsp = wdf.pivot_table(index='bu', columns='FY', values='sales_rsp_eur', aggfunc='sum', fill_value=0)
bu_pivot_wdf_sales = wdf.pivot_table(index='bu', columns='FY', values='sales_eur', aggfunc='sum', fill_value=0)
bu_pivot_wdf_cogs = wdf.pivot_table(index='bu', columns='FY', values='cogs(ms)_eur', aggfunc='sum', fill_value=0)
bu_pivot_wdf_gp = wdf.pivot_table(index='bu', columns='FY', values='gp_eur', aggfunc='sum', fill_value=0)

material_pivot_wdf_qty = wdf.pivot_table(index='material', columns='FY', values='sales_quantity', aggfunc='sum', fill_value=0) 
material_pivot_wdf_sales_rsp = wdf.pivot_table(index='material', columns='FY', values='sales_rsp_eur', aggfunc='sum', fill_value=0)
material_pivot_wdf_sales = wdf.pivot_table(index='material', columns='FY', values='sales_eur', aggfunc='sum', fill_value=0)
material_pivot_wdf_cogs = wdf.pivot_table(index='material', columns='FY', values='cogs(ms)_eur', aggfunc='sum', fill_value=0)
material_pivot_wdf_gp = wdf.pivot_table(index='material', columns='FY', values='gp_eur', aggfunc='sum', fill_value=0)

In [None]:
# 1. Calculate % Discount:
pivot_wdf_discount = (pivot_wdf_sales_rsp - pivot_wdf_sales) / pivot_wdf_sales_rsp * 100

# 2. Calculate % Profit:
# Here, you might want to handle cases where 'sales_eur' is zero to avoid division by zero errors
with np.errstate(divide='ignore', invalid='ignore'):  # This will handle the divide by zero warning
    pivot_wdf_profit = pivot_wdf_gp / pivot_wdf_sales * 100
    pivot_wdf_profit[pivot_wdf_sales == 0] = np.nan  # Replace any infinity values with NaN or you can use 0 or any other number

In [128]:
# Group by sales_order_so and determine its approval status
approval_status = wdf.groupby('sales_order_so')['sstp_approval_no'].agg(['sum', 'count'])

approval_status['approval_type'] = np.where(
    approval_status['sum'] > 0, 'With Approval', 'Without Approval'
)

# Merge the original dataframe with the approval status
wdf = wdf.merge(approval_status[['approval_type']], left_on='sales_order_so', right_index=True, how='left')

# Now, get counts for each customer_name
total_sales_orders = wdf.groupby('customer_name')['sales_order_so'].nunique()

sales_orders_with_approval = wdf[wdf['approval_type'] == 'With Approval'].groupby('customer_name')['sales_order_so'].nunique()

sales_orders_without_approval = wdf[wdf['approval_type'] == 'Without Approval'].groupby('customer_name')['sales_order_so'].nunique()

# Combine the results
result = pd.concat([total_sales_orders, sales_orders_with_approval, sales_orders_without_approval], axis=1)
result.columns = ['Total Sales Orders', 'With Approval', 'Without Approval']

In [130]:
writer = pd.ExcelWriter(f'data_files/outcome/check_.xlsx')
result.to_excel(writer, sheet_name=f'years', index=True)
writer.close()

In [92]:
df = wdf.copy()

# 2. Time Series Analysis:
monthly_sales = df.groupby('year_month')['sales_eur'].sum()

# 3. Product Analysis:
top_bu_by_sales = df.groupby('bu')['sales_eur'].sum().nlargest(10)
top_products_by_sales = df.groupby('material')['sales_eur'].sum().nlargest(10)
top_bu_by_profit = df.groupby('bu')['gp_eur'].sum().nlargest(10)
top_products_by_profit = df.groupby('material')['gp_eur'].sum().nlargest(10)

# 4. Customer Analysis:
top_customers_by_sales = df.groupby('customer_name')['sales_eur'].sum().nlargest(10)

# 6. Discount Analysis:
df['discount_eur'] = df['sales_rsp_eur'] - df['sales_eur']
avg_discount = df['discount_eur'].mean()