In [1]:
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Get current year and month
now = datetime.now()

# Subtract one month from current date
last_month = now - relativedelta(months=1)

# Format last month as string in YYYYMM format
year_month = last_month.strftime("%Y%m")

# Construct file path
file_path = f"data_files/{year_month}.xlsx"

# Read xlsx file into pandas DataFrame
df = pd.read_excel(file_path)

In [2]:
#combining products groups 
control = ['YY112', 'YY113', 'YY114', 'YY115', 'YY138']
transmitters = ['YY116', '811']
flowmeters = ['YY117', '821']
gc = ['YY118']
analytical = ['YY119']
asi = ['311', '312']

In [3]:
#necessary modifications of columns
df['bu'] = df['bu'].astype(str)
df['company_up'] = df['sold_to_customer_n'].str.upper()
df = df.rename(columns={'Unnamed: 14': 'order_intake_amount_eur'})
# Filter out rows with unwanted value in column2
df = df[~df['company_up'].str.contains('YOKOGAWA') & ~df['company_up'].str.contains('PXISE') & ~df['company_up'].str.contains('KBC')]  

In [7]:
# Define a function to combine values in bu
def combine_values(x):
    if x in control:
        return 'Control (YY112+YY113+YY114+YY115+YY138)'
    elif x in transmitters:
        return 'Transmitters (YY116+811)'
    elif x in flowmeters:
        return 'Flowmeters (YY117+821)'
    elif x in gc:
        return 'GC (YY118)'
    elif x in analytical:
        return 'Analytical Products (YY119)'
    elif x in asi:
        return 'ASI (311 + 312)'
    else:
        return 'Other'

# Apply the function to create a new column with combined values
df['Business Unit'] = df['bu'].apply(combine_values)

# Group the data by the new combined column and sum the values in order_intake_amount_eur
report_bu = df.groupby(['Business Unit'])['order_intake_amount_eur'].agg('sum')
report_bu = report_bu.div(1000).round(1)

# Compute the total excluding the value Other in Business Unit
report_bu = report_bu.loc[report_bu.index.get_level_values('Business Unit') != 'Other'].groupby('Business Unit').agg('sum').to_frame('Total, kEuro')

# Append the total row to the bottom of the grouped DataFrame
report_bu = report_bu.reset_index()
col_sum = report_bu['Total, kEuro'].sum()
report_bu.loc[len(report_bu)] = ['Total: ', col_sum]

report_opco = df.groupby(['company_code_n', 'Business Unit'])['order_intake_amount_eur'].sum()

# Divide the sums by 1000 and round them
report_opco = report_opco.div(1000).round(1)
# Sort the values in descending order
report_opco = report_opco.sort_values(ascending=False)

In [8]:
# Create an ExcelWriter object
writer = pd.ExcelWriter('data_files/bu_summary.xlsx')

# Save each DataFrame to a separate sheet in the same file
report_bu.to_excel(writer, sheet_name='report_bu')
report_opco.to_excel(writer, sheet_name='report_opco')

# Save the file
writer.save()

  writer.save()
