In [None]:
def refactor_monthly_data_frame(data_frame_name):
    print("Refactoring ", data_frame_name, " as per project requirement")
    import pandas as pd
    df = pd.read_csv('input_data/2023/'+data_frame_name, sep=";")

    df_category = pd.read_csv('supporting_data/category_mapper.csv', sep=';')
    empty_categories = set(df_category["Sub-category"])-set(df["Sub-category"])

    if list(empty_categories):
        for sub_cat in empty_categories:
            temp_dict = {"Description": sub_cat + "_empty_entry", 
            "Debit": 0.0, 
            "Credit": 0.0, 
            "Sub-category": sub_cat}
            df = pd.concat([df, pd.DataFrame(temp_dict, index=[0])], ignore_index=True)

    # fill down the NaN values in Date column
    df['Date'] = df['Date'].fillna(method='ffill')

    try:
        df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%y")
    except:
        pass

    # extract month from date column
    df['Date'] = pd.to_datetime(df['Date'])
    df['day'] = df['Date'].dt.day
    df['month'] = df['Date'].dt.month_name()
    df['year'] = df['Date'].dt.year
    df.drop(['Date'], axis=1, inplace=True)

    df["Credit"] = df["Credit"].astype(str).str.replace(",", ".").astype(float)
    df["Debit"] = df["Debit"].astype(str).str.replace(",", ".").astype(float)
    df = df.fillna(0)

    # add Category and Category Type column to df from df_category based on Sub-category
    df['Category'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category'])
    df['Category Type'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category Type'])


    df.to_csv('processed_data/2023/'+ data_frame_name, sep=";", index=False)
    print("Refactoring of ", data_frame_name, " is completed !! ")
    return None

In [None]:
import pandas as pd
import numpy as np
considered_months = ["01_Jan.csv", "02_Feb.csv", "03_March.csv", "04_April.csv", "05_May.csv", "06_June.csv", "07_July.csv"]



for location in considered_months:
    refactor_monthly_data_frame(location)

df = pd.DataFrame()
for location in considered_months:
    df_temp = pd.read_csv('processed_data/2023/' + location, sep=';')
    df = pd.concat([df, df_temp], ignore_index=True)

df["effective_amount"] = df["Credit"] - df["Debit"]
df["Balance_Amount"] = df["effective_amount"].cumsum()

#df.tail(50)

In [None]:
Income_sources = ['Salary', 'Tax less Income', 'PDP Income', 'Office Travel Income']
yearly_expenses = pd.pivot_table(df, index=['Sub-category'], values=['Debit'], aggfunc=np.sum).reset_index()
yearly_expenses = yearly_expenses[~yearly_expenses['Sub-category'].isin(Income_sources)]
yearly_expenses.columns = ['Sub-category', 'amount']


yearly_income = pd.pivot_table(df, index=['Sub-category'], values=['Credit'], aggfunc=np.sum).reset_index()
yearly_income = yearly_income[yearly_income['Sub-category'].isin(Income_sources)]
yearly_income.columns = ['Sub-category', 'amount']

df_yearly_detailed_metrics = pd.concat([yearly_income, yearly_expenses], axis=0)


temp_dict = dict(zip(df_yearly_detailed_metrics["Sub-category"], df_yearly_detailed_metrics["amount"]))
total_income = temp_dict["Salary"]+temp_dict["Tax less Income"]
df_yearly_detailed_metrics["percentage"]=round((df_yearly_detailed_metrics["amount"]/total_income)*100,2)


df_yearly_detailed_metrics.to_csv("processed_data/yearly_detailed_metrics.csv", sep=";", index=False)
#df_yearly_detailed_metrics

In [None]:
Income_sources = ['Salary', 'Tax less Income', 'PDP Income', 'Office Travel Income']
monthly_expenses = pd.pivot_table(df, index=['month','Sub-category'], values=['Debit'], aggfunc=np.sum).reset_index()
monthly_expenses = monthly_expenses[~monthly_expenses['Sub-category'].isin(Income_sources)]
monthly_expenses.columns = ['month','Sub-category', 'amount']


monthly_income = pd.pivot_table(df, index=['month','Sub-category'], values=['Credit'], aggfunc=np.sum).reset_index()
monthly_income = monthly_income[monthly_income['Sub-category'].isin(Income_sources)]
monthly_income.columns = ['month','Sub-category', 'amount']

df_monthly_detailed_metrics = pd.concat([monthly_income, monthly_expenses], axis=0)
df_monthly_detailed_metrics.to_csv("processed_data/monthly_detailed_metrics.csv", sep=";", index=False)
#df_monthly_detailed_metrics