In [354]:
def refactor_monthly_data_frame(data_frame_name):
    print("Refactoring ", data_frame_name, " as per project requirement")
    import pandas as pd
    df = pd.read_csv('input_data/2023/'+data_frame_name, sep=";")

    df_category = pd.read_csv('supporting_data/category_mapper.csv', sep=';')

    if data_frame_name == "00_old_balance.csv":
        print("None")
    else:
        empty_categories = set(df_category["Sub-category"])-set(df["Sub-category"])

        if list(empty_categories):
            for sub_cat in empty_categories:
                temp_dict = {"Description": sub_cat + "_empty_entry", 
                "amount": 0.0,
                "Sub-category": sub_cat}
                df = pd.concat([df, pd.DataFrame(temp_dict, index=[0])], ignore_index=True)

        # fill down the NaN values in Date column
        df['Date'] = df['Date'].fillna(method='ffill')

    try:
        df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%y")
    except:
        pass

    # extract month from date column
    df['Date'] = pd.to_datetime(df['Date'])
    df['day'] = df['Date'].dt.day
    df['month'] = df['Date'].dt.month_name()
    df['month_number'] = df['Date'].dt.month
    df['year'] = df['Date'].dt.year
    df.drop(['Date'], axis=1, inplace=True)

    df["amount"] = df["amount"].astype(str).str.replace(",", ".").astype(float)
    df = df.fillna(0)

    # add Category and Category Type column to df from df_category based on Sub-category
    df['Category'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category'])
    df['Category Type'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category Type'])


    df.to_csv('processed_data/2023/'+ data_frame_name, sep=";", index=False)
    print("Refactoring of ", data_frame_name, " is completed !! ")
    return None

In [355]:
import pandas as pd
import numpy as np
considered_months = ["00_old_balance.csv","01_Jan.csv", "02_Feb.csv", "03_March.csv", "04_April.csv", "05_May.csv", "06_June.csv", "07_July.csv", "08_August.csv", "09_September.csv", "10_October.csv", "11_November.csv", "12_December.csv"]

for location in considered_months:
    refactor_monthly_data_frame(location)

df = pd.DataFrame()
for location in considered_months:
    df_temp = pd.read_csv('processed_data/2023/' + location, sep=';')
    df = pd.concat([df, df_temp], ignore_index=True)

df["Balance_Amount"] = df["amount"].cumsum()
df.to_csv("processed_data/all_transactions.csv", sep=";", index=True)
#df.head()

Refactoring  00_old_balance.csv  as per project requirement
None
Refactoring of  00_old_balance.csv  is completed !! 
Refactoring  01_Jan.csv  as per project requirement
Refactoring of  01_Jan.csv  is completed !! 
Refactoring  02_Feb.csv  as per project requirement
Refactoring of  02_Feb.csv  is completed !! 
Refactoring  03_March.csv  as per project requirement
Refactoring of  03_March.csv  is completed !! 
Refactoring  04_April.csv  as per project requirement
Refactoring of  04_April.csv  is completed !! 
Refactoring  05_May.csv  as per project requirement
Refactoring of  05_May.csv  is completed !! 
Refactoring  06_June.csv  as per project requirement
Refactoring of  06_June.csv  is completed !! 
Refactoring  07_July.csv  as per project requirement
Refactoring of  07_July.csv  is completed !! 
Refactoring  08_August.csv  as per project requirement
Refactoring of  08_August.csv  is completed !! 
Refactoring  09_September.csv  as per project requirement
Refactoring of  09_September.c

In [356]:
old_balance = df[df['Sub-category'] == 'Old Balance Income']['amount'].values.sum()
#old_balance

In [357]:
df_coupons = pd.read_csv("input_data/2023/100_coupons.csv", sep=";")
# extract month from date column
df_coupons['Date'] = pd.to_datetime(df_coupons['Date'])
df_coupons['month'] = df_coupons['Date'].dt.month_name()

# pivot table for month and amount
df_coupons = df_coupons.pivot_table(index='month', values='amount', aggfunc='sum').reset_index()
df_coupons.set_index('month', inplace=True)


# add new row with total
df_coupons.loc['Total'] = df_coupons.sum(numeric_only=True, axis=0)
df_coupons.to_csv("processed_data/2023/100_coupons.csv",sep=";" , index=True)

In [358]:
def calculate_sum_and_percentage(df):
    df.set_index('Sub-category', inplace=True)
    df.loc["total_gross_income"] = df.loc["Salary"]+df.loc["Tax less Income"]+df.loc["Gift Coupon Income"]
    df.loc["total_gross_income_pdp"] = df.loc["total_gross_income"]+df.loc["PDP Income"]+df.loc["Office Travel Income"]+df.loc["Gift Coupon Income"]
    df.loc["total_state_cuttings"] = df.loc["Tax"]+df.loc["Health Insurance"]+df.loc["Pension"]+df.loc["Unemployment Fund"]+df.loc["Nursing Care"]
    df.loc["total_net_income"] = df.loc["total_gross_income"]+df.loc["total_state_cuttings"]+df.loc["Directed to Company Pension"]-df.loc["Gift Coupon Income"]
    df.loc["tot_living_exp"] = df.loc["Rent"]+df.loc["Phone & WiFi"]+df.loc["Groceries"]+df.loc["Travel Pass"]+df.loc["Gifts"]
    df.loc["tot_extra_exp"] = df.loc["Entertainment"]+df.loc["Vacation"]+df.loc["Coupon spendings"]+df.loc["Family & Kids"]+df.loc["Restaurant"]+df.loc["Taxi"]+df.loc["Cloths"]+df.loc["Gym & Self grooming"]
    df.loc["total_savings"] = df.loc["To India"]+df.loc["Company Pension as gift coupon spending"]+df.loc["Directed to Company Pension"]
    df.loc["complete_expenses"] = df.loc["tot_living_exp"]+df.loc["tot_extra_exp"]
    df.loc["old_balance"] = old_balance
    try:
        df["percentage"]=round((df["amount"]/df.loc["total_gross_income"].values[0])*100,2)
    except:
        df["percentage"] = 0
    return df


In [359]:
df_yearly_detailed_metrics = pd.pivot_table(df, index=['Sub-category'], values=['amount'], aggfunc=np.sum).reset_index()

df_yearly_detailed_metrics = calculate_sum_and_percentage(df_yearly_detailed_metrics)
df_yearly_detailed_metrics.loc["reamining_balance"] = df["Balance_Amount"].values[-1]
df_yearly_detailed_metrics.to_csv("processed_data/yearly_detailed_metrics.csv", sep=";", index=True)
#df_yearly_detailed_metrics

In [360]:
df_monthly_detailed_metrics = pd.pivot_table(df, index=['month','Sub-category', ], values=['amount'], aggfunc=np.sum).reset_index()
df_monthly_detailed_metrics.to_csv("processed_data/monthly_detailed_metrics_2.csv", sep=";", index=True)
df_monthly_detailed_metrics_with_percentage = pd.DataFrame()
for month in df_monthly_detailed_metrics["month"].unique():
    temp_df = df_monthly_detailed_metrics[df_monthly_detailed_metrics["month"]==month][["Sub-category","amount"]].copy()
    temp_df = calculate_sum_and_percentage(temp_df)
    temp_df.loc["reamining_balance"] = df[df["month"]==month]["Balance_Amount"].values[-1]
    temp_df["month"] = month
    temp_df["percentage"] = temp_df["percentage"].fillna(0)
    df_monthly_detailed_metrics_with_percentage = pd.concat([df_monthly_detailed_metrics_with_percentage,temp_df],axis=0)
df_monthly_detailed_metrics_with_percentage.to_csv("processed_data/monthly_detailed_metrics.csv", sep=";", index=True)
#df_monthly_detailed_metrics_with_percentage