This script is designed to automate the process of generating a report based on another report in a specific format required for a particular task. It compares the budget with the current performance and the performance of the previous year for the same time period across designated product groups. The source information is stored in a file, and to retrieve the data, it is necessary to sequentially substitute the value of each branch in the corresponding cell.

To reduce designing time, it has been decided to save the updated file in a separate folder. Then, the script opens the file, saves and reads the data again, as the formula cells do not update automatically. The obtained data sets are then processed using functions to obtain the final result and save it in an Excel file. Afterwards, macros are applied to finalize the file.

Therefore, this script automates the report generation process, performs data comparison and result processing, and enables the application of macros for further processing of the resulting file.

In [None]:
from openpyxl import load_workbook
import pandas as pd
import openpyxl
import numpy as np
import os

In [None]:
# function which creates dataframe in special view for reporting
def how_a_need(df):
    
    Transmitters = df.iloc[20:29, [1,3]]
    Transmitters.reset_index(drop=True, inplace=True)
    new_column_name = 'Transmitters'
    Transmitters = Transmitters.rename(columns={Transmitters.columns[1]: new_column_name})
    Transmitters = Transmitters.set_index(Transmitters.columns[0]).T.rename_axis(None, axis=1)
    
    
    Flowmeters = df.iloc[33:42, [1,3]]
    Flowmeters.reset_index(drop=True, inplace=True)
    new_column_name = 'Flowmeters'
    Flowmeters = Flowmeters.rename(columns={Flowmeters.columns[1]: new_column_name})
    Flowmeters = Flowmeters.set_index(Flowmeters.columns[0]).T.rename_axis(None, axis=1)
    
    ASI = df.iloc[46:55, [1,3]]
    ASI.reset_index(drop=True, inplace=True)
    new_column_name = 'QA/ASI'
    ASI = ASI.rename(columns={ASI.columns[1]: new_column_name})
    ASI = ASI.set_index(ASI.columns[0]).T.rename_axis(None, axis=1)
    
    Analytical = df.iloc[59:68, [1,3]] 
    Analytical.reset_index(drop=True, inplace=True)
    new_column_name = 'Analytical'
    Analytical = Analytical.rename(columns={Analytical.columns[1]: new_column_name})
    Analytical = Analytical.set_index(Analytical.columns[0]).T.rename_axis(None, axis=1)
    
    
    Netsol = df.iloc[72:81, [1,3]]
    Netsol.reset_index(drop=True, inplace=True)
    new_column_name = 'Netsol'
    Netsol = Netsol.rename(columns={Netsol.columns[1]: new_column_name})
    Netsol = Netsol.set_index(Netsol.columns[0]).T.rename_axis(None, axis=1)
    
    combined_df = pd.concat([Transmitters, Flowmeters, ASI, Analytical, Netsol])
    
    right_order = ['FY23B', 'FY23A', 'FY22A', 'FY23B - GP', 'FY23A - GP', 'FY22A - GP', 
       'FY23B - GM', 'FY23A - GM', 'FY22A - GM']
    
    combined_df = combined_df[right_order]
    
    return combined_df

In [46]:
# read data from a source file
workbook = load_workbook('data_files/p02_source.xlsx')

sheet_with_opco = workbook['Opco (Domestic) OI']
# get list of opco from the sheet
values = []

for row in sheet_with_opco['A2:A23']:
    for cell in row:
        values.append(cell.value)

In [50]:
# prepare data for further processing
target_sheet = workbook['OpCo - Domestic ST & GM']

for v in values:
    target_sheet['P2'] = v
    workbook.save(f'data_files/temp/p02_source_{v}.xlsx')
    
workbook.close()

folder_path = 'data_files/temp'
target_sheet = 'OpCo - Domestic ST & GM'

dataframes_dict = {}

for file_name in os.listdir(folder_path):
    if file_name.endswith('.xls') or file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        key = file_name.split('_')[-1].split('.')[0]  # Extract key from file name
        
        df = pd.read_excel(file_path, sheet_name=target_sheet)
        dataframes_dict[key] = df

! Now it is necessary to open each file and save it in the temp folder !

In [None]:
output_file = 'data_files/outcome/output_file_domestic.xlsx'

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, df in dataframes_dict.items():
        df = how_a_need(df)
        df.to_excel(writer, sheet_name=sheet_name)

In [None]:
# another sheet 
workbook = load_workbook('data_files/p02_source.xlsx')
target_sheet = workbook['OpCo - Total ST & GM']

for v in values:
    target_sheet['P2'] = v
    workbook.save(f'data_files/temp_total/p02_source_{v}.xlsx')
    
workbook.close()

folder_path = 'data_files/temp_total'  
target_sheet = 'OpCo - Total ST & GM'

dataframes_dict = {}

for file_name in os.listdir(folder_path):
    if file_name.endswith('.xls') or file_name.endswith('.xlsx'):
        file_path = os.path.join(folder_path, file_name)
        key = file_name.split('_')[-1].split('.')[0]  # Extract key from file name
        
        df = pd.read_excel(file_path, sheet_name=target_sheet)
        dataframes_dict[key] = df

! Now it is necessary to open each file and save it in the temp folder !

In [116]:
output_file = 'data_files/outcome/output_file_total.xlsx'

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, df in dataframes_dict.items():
        df = how_a_need(df)
        df.to_excel(writer, sheet_name=sheet_name)