### Automating bank Expense statements
- This python scripts automates the monthly expense table using the transaction details exported in the form of a csv file
- One thing to keep in mind here is that the csv filename must be saved with the short hand name of the month like Jan, Feb, Mar etc. (case-sensitive)
- Input:
    - <month>.csv file in the Transactions folder with the 'month'  being the short hand form of the current month
- Output:
    - Unformatted output.xlsx file containin the columns *DATE*, *COMMODOTY* and *AMOUNT* with the TOTAL as the last row

#### 1. Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import pathlib
import datetime

#### 2. Read the *Transactions.csv* file

In [2]:
# Mapping between german and english months
deu_to_eng = {"Mrz": "Mar", "Mai": "May", "Okt": "Oct", "Dez": "Dec"}

# Extract the month from the datetime object
month = datetime.datetime.now().strftime("%b")

# Read the csv file
try:
    csv_file_path = pathlib.Path(f"Transactions/{month}.csv")
except FileNotFoundError:
    csv_file_path = pathlib.Path(f"Transactions/{deu_to_eng[month]}.csv")
    
csv_df = pd.read_csv(csv_file_path, header=4, sep=';', usecols=["Value date", "Beneficiary / Originator", "Payment Details", "Debit"], 
                     dtype={"Value Date": 'str', "Beneficiary / Originator": 'str', "Payment Details": 'str', "Debit": 'float'},
                     parse_dates=[1], date_format="%d/%m/%Y")

# csv_df.head()

#### 3. Rename the necessary Columns

In [3]:
# Rename the columns
csv_df.columns=["DATE", "COMMODITY", "Payment Details", "AMOUNT"]
# csv_df.head()

#### 4. Replace the NaN values in Collumn COMMODITY with the data in the next column

In [4]:
# Replace the NaN values in Collumn COMMODITY with the data in the next column
csv_df = csv_df.bfill(axis=1)

#### 5. Handle data in Individual columns

In [5]:
# Replace the NaN values in Payment Details with None
csv_df["Payment Details"] = csv_df["Payment Details"].replace(np.nan, "None. Please check CSV file for details")
csv_df.head()

# Truncate unnecessary data from the COMMODITY column
def truncate_data(data):
    if "//" in str(data):
        end_idx = str(data).index("//")
        data = str(data[0:end_idx])
        
    return data

csv_df["COMMODITY"] = csv_df["COMMODITY"].apply(truncate_data)

# Convert the column Amount to a positive value
csv_df["AMOUNT"] = csv_df["AMOUNT"].apply(lambda x: -x)

##### 5.1. Determnine category based on the Commodity and add it as a column

In [None]:
# Determnine category based on the Commodity and add it as a column
def categorize_data(data):
    category_values = []
    frequency = []
    categories = {"U": ["Telekom", "ARD", "Drillisch", "E.ON"], 
                "G":  ["ALDI", "PENNY", "ISTANBUL", "LIDL", "EDEKA", "NORMA"],
                "R": ["Domicil"], 
                "TR": ["TICKET", "LOGPAY"],
                "T": ["SUBWAY", "LIEFERANDO", "SIPL", "ZATTIS", "SALAT KIND", "Mdonalds", "BACKHAUS", "BACKWERK"],
                "M": ["LinkedIn"]
                }

    wise_idx = 0
    for item in data:
        for key in list(categories.keys()):        
            if any(s.lower() in str(item).lower() for s in categories[key]):
                category_values.append(key)
                if key == "U" or key == "R":
                    frequency.append("Monthly")
                else:
                    frequency.append("Non-Monthly")
                break                
        else:
            if "wise" in str(item).lower():        
                wise_transactions = list(csv_df.loc[csv_df["COMMODITY"] == item, "AMOUNT"])
                if wise_transactions[wise_idx] > 300:
                    category_values.append("I")
                    frequency.append("Monthly")
                    wise_idx+=1
                elif 100 < wise_transactions[wise_idx] < 300:
                    category_values.append("L")
                    frequency.append("Monthly")
                    wise_idx+=1
            elif "paypal" in str(item).lower():
                category_values.append("Unknown")
                frequency.append("Non-Monthly")
            else:
                category_values.append("M")
                frequency.append("Non-Monthly")
    return category_values, frequency

list_of_commodities = list(csv_df["COMMODITY"])                                    
categorized_data, frequencies = categorize_data(list_of_commodities)
csv_df["CATEGORY"] = categorized_data
csv_df["FREQUENCY"] = frequencies
csv_df = csv_df.sort_values("FREQUENCY")
# example_df.head()

Unnamed: 0,DATE,COMMODITY,Payment Details,AMOUNT,CATEGORY,FREQUENCY
0,02/01/2025,Telekom Deutschland GmbH,OTHR Sonst. Transakt Festnetz Vertragskonto 56...,27.89,U,Monthly
17,16/01/2025,Drillisch Online GmbH,C3882607 U686964600 B790286575 winSIM,7.99,U,Monthly
12,09/01/2025,Wise Europe SA,pisp1368801733,399.32,I,Monthly
11,07/01/2025,Drillisch Online GmbH,C3882607 U685812727 B785817932 WINSIM B2C.DE,4.99,U,Monthly
9,06/01/2025,Wise Europe SA,pisp1362991211,199.12,L,Monthly


#### 6. Remove the column Payment Details

In [None]:
# Remove the column Payment Details
filtered_df = csv_df.drop(columns=["Payment Details"])
# filtered_df.head()

#### 7. Drop the NaN values if any

In [None]:
# Drop the NaN values if any
filtered_df.dropna(inplace=True)

#### 8. Append the last row as the Sum total

In [None]:
# Append the last row as the Sum total
# total_expense = filtered_df["AMOUNT"].sum()
# filtered_df = pd.concat([filtered_df, pd.DataFrame([{"DATE": "TOTAL", "COMMODITY": "", "AMOUNT": total_expense}])])
# print(total_expense)
# filtered_df.tail()

#### 9. Append the footer dataframe

In [None]:
# Footer dataframe
salary = 2904.18
total_expense = filtered_df["AMOUNT"].sum()
savings = salary - total_expense

f_data = {"DATE": ["TOTAL", "SALARY", "SAVINGS"], "COMMODITY": ["", "", ""], "AMOUNT": [total_expense, salary, savings], "CATEGORY": ["", "", f"{(savings/salary)*100:.2f} %"]}
f_dataframe = pd.DataFrame(f_data)
f_dataframe

if 'TOTAL' not in list(filtered_df["DATE"]):
    filtered_df = pd.concat([filtered_df, f_dataframe], ignore_index=True)

filtered_df.tail()

#### 10. Export the filtered dataframe to excel

In [None]:
# Export the filtered dataframe to excel
filtered_df.to_excel("output.xlsx", sheet_name="Sheet1", index=False, float_format="%.2f")

#### 11. Format the 'output.xlsx'

In [None]:
%run expense_formatter_structured.ipynb