In [1]:
import pandas as pd
import openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Alignment, PatternFill, Font
from openpyxl.worksheet.dimensions import ColumnDimension
from datetime import datetime, timedelta

# Import csv
df_bank = pd.read_csv('mock_bank_data_1-5Sep2024.csv')
df_company = pd.read_csv('mock_company_data_1-5Sep2024.csv')

In [2]:
# Define start_date, end_date, month, and year
start_date = 1
end_date = 5
month = 9 
year = 2024

In [3]:
# Check data info
df_bank.info()
df_bank.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99971 entries, 0 to 99970
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   transaction_id  99971 non-null  object
 1   datetime        99971 non-null  object
 2   amount          99971 non-null  int64 
 3   ref_code        99971 non-null  object
dtypes: int64(1), object(3)
memory usage: 3.1+ MB


Unnamed: 0,transaction_id,datetime,amount,ref_code
0,LW24090200001,2/9/2024 13:02,458,Z008
1,LW24090500002,5/9/2024 18:57,478,Z001
2,LW24090200003,2/9/2024 13:02,263,Z007
3,LW24090300004,3/9/2024 16:31,332,Z007
4,LW24090200005,2/9/2024 13:02,289,Z003


In [4]:
# Check data info
df_company.info()
df_company.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99984 entries, 0 to 99983
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   transaction_id  99984 non-null  object
 1   datetime        99984 non-null  object
 2   amount          99984 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 2.3+ MB


Unnamed: 0,transaction_id,datetime,amount
0,LW24090200001,2/9/2024 13:02,458
1,LW24090500002,5/9/2024 18:57,478
2,LW24090200003,2/9/2024 13:02,263
3,LW24090300004,3/9/2024 16:31,332
4,LW24090200005,2/9/2024 13:02,289


In [5]:
# Change datatype
df_bank['datetime'] = pd.to_datetime(df_bank['datetime'], format='%d/%m/%Y %H:%M')
df_company['datetime'] = pd.to_datetime(df_company['datetime'], format='%d/%m/%Y %H:%M')

# Sort data by datetime
df_bank = df_bank.sort_values(by='datetime')
df_company = df_company.sort_values(by='datetime')

# Add a helper column to keep only 'date' value
df_bank['date_formatted'] = df_bank['datetime'].dt.strftime('%Y-%m-%d')
df_company['date_formatted'] = df_bank['datetime'].dt.strftime('%Y-%m-%d')

In [6]:
# Create a workbook and add sheets
wb = Workbook()

# 1. Summary Sheet
ws_summary = wb.active
ws_summary.title = "Summary"

# Add "Total" in C2 & Apply formatting
ws_summary["C2"] = "Total"
ws_summary["C2"].fill = PatternFill(start_color="FFFF99", end_color="FFFF99", fill_type="solid")
ws_summary["C2"].font = Font(bold=True)
ws_summary["C2"].alignment = Alignment(horizontal="center") 

# Add dates in D2 onward & Apply formatting
start_date_obj = datetime(year, month, start_date)
for i in range(end_date - start_date + 1):
    date = start_date_obj + timedelta(days=i)
    cell = ws_summary.cell(row=2, column=4 + i)
    cell.value = date.strftime('%Y-%m-%d')
    cell.font = Font(bold=True)
    cell.alignment = Alignment(horizontal="center")

# Add "Company" and "Bank" labels in B3 and B4 & Apply formatting
data = [("B3", "Company Amount"), ("B4", "Bank Amount")]
for cell, value in data:
    ws_summary[cell] = value
    ws_summary[cell].font = Font(bold=True)

# Add SUMIF formulas in D3 and D4 onward
last_column = 4 + (end_date - start_date) 
for col in range(4, last_column + 1):  
    col_letter = ws_summary.cell(row=2, column=col).column_letter
    ws_summary.cell(row=3, column=col).value = f"=SUMIF('Company Data'!D:D, {col_letter}2, 'Company Data'!C:C)"
    ws_summary.cell(row=4, column=col).value = f"=SUMIF('Bank Data'!E:E, {col_letter}2, 'Bank Data'!C:C)"

# Add total formulas in C3 and C4
last_col_letter = ws_summary.cell(row=2, column=last_column).column_letter
data = [("C3", f"=SUM(D3:{last_col_letter}3)"), ("C4", f"=SUM(D4:{last_col_letter}4)")]

for cell, value in data:
    ws_summary[cell] = value
    ws_summary[cell].font = Font(bold=True)
    ws_summary[cell].alignment = Alignment(horizontal="center")

# Apply number formatting in Summary Sheet
for col in range(3, last_column + 1):
    ws_summary.cell(row=3, column=col).number_format = '#,##0.00'
    ws_summary.cell(row=4, column=col).number_format = '#,##0.00'

# 2. Company Data Sheet
ws_company = wb.create_sheet(title="Company Data")

# Add data from the DataFrame & Apply formatting for the amount column
for r_idx, row in enumerate(dataframe_to_rows(df_company, index=False, header=True), start=1):
    for c_idx, value in enumerate(row, start=1):
        cell = ws_company.cell(row=r_idx, column=c_idx, value=value)
        if isinstance(value, (int, float)): 
            cell.number_format = '#,##0.00'

# 3. Bank Data Sheet
ws_bank = wb.create_sheet(title="Bank Data")

# Add data from the DataFrame & Apply formatting for the amount column
for r_idx, row in enumerate(dataframe_to_rows(df_bank, index=False, header=True), start=1):
    for c_idx, value in enumerate(row, start=1):
        cell = ws_bank.cell(row=r_idx, column=c_idx, value=value)
        if isinstance(value, (int, float)): 
            cell.number_format = '#,##0.00'

# Adjust Column Width of Summary Sheet
for idx, column_cells in enumerate(ws_summary.columns, 1):
    if idx > 1:  # Skip column A (index 1)
        ws_summary.column_dimensions[column_cells[0].column_letter].width = 15

# Adjust Column Width of Company Sheet & Bank Sheet
for ws in [ws_company, ws_bank]:
    for column_cells in ws.columns:
        # Find the maximum length of the content in each column
        max_length = max(len(str(cell.value)) for cell in column_cells if cell.value is not None)
        
        # Adjust the width, adding a small buffer for padding
        adjusted_width = max_length + 2  # You can tweak this value for more padding
        
        # Set the column width using the column letter of the first cell in the column
        ws.column_dimensions[column_cells[0].column_letter].width = adjusted_width

# Generate filename based on the date, month, year variables
month_abbr = datetime(year, month, 1).strftime('%b')
filename = f"Reconciliation_report_{start_date}-{end_date}_{month_abbr}_{year}.xlsx"

# Save the workbook
wb.save(filename)

print(f"Reconciliation report has been created successfully as {filename}.")

Reconciliation report has been created successfully as Reconciliation_report_1-5_Sep_2024.xlsx.
