In [68]:
import pandas as pd              # Main data manipulation
from openpyxl import Workbook    # Excel writing
from openpyxl.styles import Font # Excel formatting (bold, colors)
import glob                      # File path handling
from datetime import datetime

In [69]:
def make_unique_name():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    return f'{timestamp}__report.xlsx'

In [5]:
# Trick 1 - Simple reading of worksheet from Excel workbook

In [10]:
excel_file_name = "financial_data.xlsx"

In [11]:
df = pd.read_excel(excel_file_name,
                  sheet_name = "Transactions",
                  parse_dates = ["Date"],
                  dtype={"InvoiceID":str})

In [12]:
df

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID
0,2023-01-05,INV-1001,Web Design,2500.0,Services,CUST-001
1,2023-01-12,INV-1002,Office Supplies,120.0,Expenses,CUST-002
2,2023-02-03,INV-1003,Software License,800.0,Subscriptions,CUST-001
3,NaT,INV-1004,Consulting,,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150.0,Subscriptions,CUST-002


In [13]:
# Trick 2 - Combine Reports

In [15]:
income = pd.read_excel(excel_file_name, sheet_name="Income")
expenses = pd.read_excel(excel_file_name, sheet_name="Expenses")

In [18]:
combined = pd.concat([
    income.assign(From_Worksheet="Income"),
    expenses.assign(From_Worksheet="Expenses")
])

In [19]:
combined

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID,From_Worksheet
0,2023-01-05,INV-10015,Web Design,10.0,Salary,CUST-001,Income
1,2023-01-12,INV-10026,Office Supplies,20.0,Others,CUST-002,Income
2,2023-02-03,INV-10036,Software License,40.0,Services,CUST-001,Income
3,NaT,INV-10045,Consulting,,Others,CUST-003,Income
4,2023-02-28,INV-10056,Hosting Fees,160.0,Others,CUST-002,Income
0,2024-01-05,INV-100151,Cleaning,1.0,Salary,CUST-001,Expenses
1,2024-01-12,INV-100261,Smiles,2.0,Others,CUST-002,Expenses
2,2024-02-03,INV-100361,Eating,4.0,Services,CUST-001,Expenses
3,NaT,INV-100451,Dancing,,Others,CUST-003,Expenses
4,2024-02-28,INV-100561,Singing,16.0,Others,CUST-002,Expenses


In [20]:
# Trick 3 - Fix Missing Values

In [21]:
combined["Amount"] = combined["Amount"].fillna(combined["Amount"].mean())

In [22]:
combined

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID,From_Worksheet
0,2023-01-05,INV-10015,Web Design,10.0,Salary,CUST-001,Income
1,2023-01-12,INV-10026,Office Supplies,20.0,Others,CUST-002,Income
2,2023-02-03,INV-10036,Software License,40.0,Services,CUST-001,Income
3,NaT,INV-10045,Consulting,31.625,Others,CUST-003,Income
4,2023-02-28,INV-10056,Hosting Fees,160.0,Others,CUST-002,Income
0,2024-01-05,INV-100151,Cleaning,1.0,Salary,CUST-001,Expenses
1,2024-01-12,INV-100261,Smiles,2.0,Others,CUST-002,Expenses
2,2024-02-03,INV-100361,Eating,4.0,Services,CUST-001,Expenses
3,NaT,INV-100451,Dancing,31.625,Others,CUST-003,Expenses
4,2024-02-28,INV-100561,Singing,16.0,Others,CUST-002,Expenses


In [40]:
# Trick 4 - Formatting the exported Excel file

In [73]:
new_worksheet = make_unique_name()

In [74]:
with pd.ExcelWriter(new_worksheet, engine="openpyxl") as writer:
    combined.to_excel(writer, index=False)
    
    workbook = writer.book
    worksheet=writer.sheets["Sheet1"]
    
    for cell in worksheet["1:1"]:
        cell.font = Font(bold=True)
        cell.font = Font(color="FFFF22")

In [75]:
new_worksheet

'20250329_210356__report.xlsx'

In [44]:
# Trick 5 - Merging Excel Files

In [47]:
files = glob.glob("sales12/sales_*.xlsx")
annual_data = pd.concat([pd.read_excel(f) for f in files])

In [76]:
files

['sales12\\sales_01.xlsx', 'sales12\\sales_02.xlsx', 'sales12\\sales_03.xlsx']

In [51]:
annual_data

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID
0,2023-01-05,INV-1001,Web Design,2500,Services,CUST-001
1,2023-01-12,INV-1002,Office Supplies,120,Expenses,CUST-002
2,2023-02-03,INV-1003,Software License,800,Subscriptions,CUST-001
3,NaT,INV-1004,Consulting,1,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150,Subscriptions,CUST-002
0,2023-01-05,INV-1001,Web Design,2500,Services,CUST-001
1,2023-01-12,INV-1002,Office Supplies,120,Expenses,CUST-002
2,2023-02-03,INV-1003,Software License,800,Subscriptions,CUST-001
3,NaT,INV-1004,Consulting,1,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150,Subscriptions,CUST-002


In [50]:
# Trick 6 - Smart Filtering

In [52]:
web_design_only = annual_data[
    (annual_data["Description"]=="Web Design"
    )]

In [77]:
web_design_only

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID
0,2023-01-05,INV-1001,Web Design,2500,Services,CUST-001
0,2023-01-05,INV-1001,Web Design,2500,Services,CUST-001
0,2023-01-05,INV-1001,Web Design,2500,Services,CUST-001


In [55]:
small_transactions = annual_data[
    (annual_data["Amount"] < 200
    )]

In [56]:
small_transactions

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID
1,2023-01-12,INV-1002,Office Supplies,120,Expenses,CUST-002
3,NaT,INV-1004,Consulting,1,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150,Subscriptions,CUST-002
1,2023-01-12,INV-1002,Office Supplies,120,Expenses,CUST-002
3,NaT,INV-1004,Consulting,1,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150,Subscriptions,CUST-002
1,2023-01-12,INV-1002,Office Supplies,120,Expenses,CUST-002
3,NaT,INV-1004,Consulting,1,Services,CUST-003
4,2023-02-28,INV-1005,Hosting Fees,150,Subscriptions,CUST-002


In [78]:
# Trick 7 - Mergining Tables

In [79]:
df_transactions = pd.read_excel(
            excel_file_name,
            sheet_name="Transactions")
df_customers = pd.read_excel(
            excel_file_name,
            sheet_name="Customers")

In [80]:
merged = pd.merge(
    df_transactions,
    df_customers,
    on = "CustomerID"
    )

In [61]:
merged

Unnamed: 0,Date,InvoiceID,Description,Amount,Category,CustomerID,Name,Region,SignupDate
0,2023-01-05,INV-1001,Web Design,2500.0,Services,CUST-001,VitoshAcademy,Sofia,2022-11-15
1,2023-01-12,INV-1002,Office Supplies,120.0,Expenses,CUST-002,VitoshPython,Lamia,2023-01-10
2,2023-02-03,INV-1003,Software License,800.0,Subscriptions,CUST-001,VitoshAcademy,Sofia,2022-11-15
3,NaT,INV-1004,Consulting,,Services,CUST-003,VitoshVBA,Nuremberg,NaT
4,2023-02-28,INV-1005,Hosting Fees,150.0,Subscriptions,CUST-002,VitoshPython,Lamia,2023-01-10


In [62]:
# Trick 8 - Export Dataframe to Excel

In [63]:
new_worksheet = make_unique_name()

In [66]:
with pd.ExcelWriter(new_worksheet, engine="openpyxl") as writer:
    merged.to_excel(writer)