In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import norm
import scipy.stats as stats
import math

In [2]:
budget_extract = pd.read_excel('/Users/zhengyaojin/Desktop/Budget Extract v2.xlsx')
coa_df = pd.read_csv('/Users/zhengyaojin/Desktop/Bluestar COA v2.csv')

In [3]:
merged_df = pd.merge(budget_extract, coa_df, on='Account Code', how='left')

In [4]:
budget_extract = merged_df

In [5]:
# Create a matrix to store number of days in each month
# Number of days in each month for a non-leap year
days_in_month_non_leap = [
    {"Month": "January", "Days": 31},
    {"Month": "February", "Days": 28},
    {"Month": "March", "Days": 31},
    {"Month": "April", "Days": 30},
    {"Month": "May", "Days": 31},
    {"Month": "June", "Days": 30},
    {"Month": "July", "Days": 31},
    {"Month": "August", "Days": 31},
    {"Month": "September", "Days": 30},
    {"Month": "October", "Days": 31},
    {"Month": "November", "Days": 30},
    {"Month": "December", "Days": 31}
]

# Number of days in each month for a leap year
days_in_month_leap = [
    {"Month": "January", "Days": 31},
    {"Month": "February", "Days": 29},
    {"Month": "March", "Days": 31},
    {"Month": "April", "Days": 30},
    {"Month": "May", "Days": 31},
    {"Month": "June", "Days": 30},
    {"Month": "July", "Days": 31},
    {"Month": "August", "Days": 31},
    {"Month": "September", "Days": 30},
    {"Month": "October", "Days": 31},
    {"Month": "November", "Days": 30},
    {"Month": "December", "Days": 31}
]


In [6]:
# Function to convert date format
# This part may need to change based on how the dates are defined
def convert_column_name(col_name):
    try:
        date_obj = pd.to_datetime(col_name, format='%b-%y')
        return date_obj.strftime('%B_%Y').lower()  # Format as "july_2024"
    except:
        return col_name  

In [7]:
# Normalize and convert date columns in budget_extract
budget_extract.columns = budget_extract.columns.str.strip().str.lower().str.replace(' ', '_')
budget_extract.columns = [convert_column_name(col) for col in budget_extract.columns]

In [9]:
# Function to get the number of days in a month
def get_days_in_month(month_year_str):
    
    month, year = month_year_str.split('_')
    year = int(year)
    
    if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0):
        days_in_month = days_in_month_leap
    else:
        days_in_month = days_in_month_non_leap
    
    for entry in days_in_month:
        if entry["Month"].lower() == month:
            return entry["Days"]
    return 30  

In [12]:
rows = []

# Account types that should incur a negative amount
negative_account_types = ["Direct Costs", "Expense", "COGS", "SG&A", "Expenses"]

for _, row in budget_extract.iterrows():
    for col in budget_extract.columns[4:-3]: 
            
        days = get_days_in_month(col)
        daily_amount = row[col] / days
        
        # Check if the 'Amount' needs to be negative
        if row["account_type"] in negative_account_types:
            daily_amount = -daily_amount
        
        date = pd.to_datetime(col, format='%B_%Y')
        
        # Create a row for each day in the month
        for day in range(1, days+1):
            rows.append({
                "Date": date.replace(day=day).strftime('%Y-%m-%d'),
                "Amount": daily_amount,
                "Source": row["account_group"],  
                "Description": row["account_description"],  
                "Account Code": row["account_code"],  
                "Cost Centre": row["branch"],  
                "Account Type":row["account_type"],
                "Status": "Budget"
            })

In [13]:
new_df = pd.DataFrame(rows)

# Format the 'Date' column
new_df['Date'] = pd.to_datetime(new_df['Date']).dt.strftime('%-m/%-d/%Y')

new_df = new_df[["Date", "Account Code", "Account Type", "Source", "Description", "Cost Centre", "Amount", "Status"]]
new_df

Unnamed: 0,Date,Account Code,Account Type,Source,Description,Cost Centre,Amount,Status
0,7/1/2024,64010,Expense,Overheads,Accounting Fees,Adelaide,-0.0,Budget
1,7/2/2024,64010,Expense,Overheads,Accounting Fees,Adelaide,-0.0,Budget
2,7/3/2024,64010,Expense,Overheads,Accounting Fees,Adelaide,-0.0,Budget
3,7/4/2024,64010,Expense,Overheads,Accounting Fees,Adelaide,-0.0,Budget
4,7/5/2024,64010,Expense,Overheads,Accounting Fees,Adelaide,-0.0,Budget
...,...,...,...,...,...,...,...,...
156215,6/26/2025,64658,Expense,Storage and Handling,Water,Sydney,-0.0,Budget
156216,6/27/2025,64658,Expense,Storage and Handling,Water,Sydney,-0.0,Budget
156217,6/28/2025,64658,Expense,Storage and Handling,Water,Sydney,-0.0,Budget
156218,6/29/2025,64658,Expense,Storage and Handling,Water,Sydney,-0.0,Budget


In [14]:
new_df.to_excel('output_budget_extract.xlsx', index=False)