In [10]:
considered_month = "January"
#["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

import gspread
import numpy as np
import pandas as pd
from gspread_dataframe import set_with_dataframe

gc = gspread.service_account(filename="./credentials.json")
wk_sht = gc.open("Expenses_app_db")
df_category = pd.DataFrame(wk_sht.worksheet("Supporting_info").get_all_records())

def refactor_monthly_data_frame(sht_name):
    print("Refactoring ", sht_name, " as per project requirement")

    df = pd.DataFrame(wk_sht.worksheet(sht_name).get_all_records())
    empty_categories = set(df_category["Sub-category"])-set(df["Sub-category"])

    if list(empty_categories):
        for sub_cat in empty_categories:
            temp_dict = {"Description": sub_cat + "_empty_entry", 
            "amount": 0.0,
            "Sub-category": sub_cat}
            df = pd.concat([df, pd.DataFrame(temp_dict, index=[0])], ignore_index=True)

    # fill down the NaN values in Date column
    df['Date'] = df['Date'].fillna(method='ffill')

    try:
        df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%y")
    except:
        pass

    # extract month from date column
    df['Date'] = pd.to_datetime(df['Date'])
    df['day'] = df['Date'].dt.day
    df['month'] = df['Date'].dt.month_name()
    df['month_number'] = df['Date'].dt.month
    df['year'] = df['Date'].dt.year
    df.drop(['Date'], axis=1, inplace=True)

    df["amount"] = df["amount"].astype(str).str.replace(",", ".").astype(float)
    df = df.fillna(0)

    # add Category and Category Type column to df from df_category based on Sub-category
    df['Category'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category'])
    df['Category Type'] = df['Sub-category'].map(df_category.set_index('Sub-category')['Category Type'])

    df_old = pd.DataFrame(wk_sht.worksheet("Processed_all_tranacs").get_all_records())
    df = pd.concat([df_old, df], ignore_index=True)

    try:
        df.drop(["Balance_Amount"], axis=1, inplace=True)
    except:
        pass

    df["Balance_Amount"] = df["amount"].cumsum()
    set_with_dataframe(wk_sht.worksheet("Processed_all_tranacs"), df)

    print("Refactoring of ", sht_name, " is completed !! ")
    return df

df = refactor_monthly_data_frame(considered_month)

Refactoring  January  as per project requirement
Refactoring of  January  is completed !! 


In [11]:
old_balance = df[df['Sub-category'] == 'Old Balance Income']['amount'].values.sum()
# old_balance

In [18]:
df_coupons = pd.DataFrame(wk_sht.worksheet("Coupons").get_all_records())
# extract month from date column
df_coupons['Date'] = pd.to_datetime(df_coupons['Date'])
df_coupons['month'] = df_coupons['Date'].dt.month_name()
df_coupons["Effective_Amount"] = df_coupons["Credit"]+df_coupons["Debit"]
df_coupons["Balance_Amount"] = df_coupons["Effective_Amount"].cumsum()


final_df = pd.DataFrame(columns=['Debit', 'Balance_Amount'], index=df_coupons['month'].unique())
for month in df_coupons['month'].unique():
    temp_df = df_coupons[df_coupons['month']== month].copy()
    final_df.loc[month]['Debit'] = temp_df['Debit'].sum()
    final_df.loc[month]['Balance_Amount'] = temp_df["Balance_Amount"].values[-1]
    
final_df.loc['Total'] = final_df.sum(axis=0)
final_df = final_df.reset_index().replace([np.inf, -np.inf], np.nan).fillna(0)
set_with_dataframe(wk_sht.worksheet("Coupon_metrics"), final_df)
final_df

Unnamed: 0,index,Debit,Balance_Amount
0,January,0,0
1,February,0,0
2,March,0,0
3,April,0,0
4,May,0,0
5,June,0,0
6,July,0,0
7,August,0,0
8,September,0,0
9,October,0,0


In [13]:
def calculate_sum_and_percentage(df):
    df.set_index('Sub-category', inplace=True)
    df.loc["total_gross_income"] = df.loc["Salary"]+df.loc["Tax less Income"]+df.loc["Gift Coupon Income"]+df.loc["Pension Insentive Income"]
    df.loc["total_gross_income_pdp"] = df.loc["total_gross_income"]+df.loc["PDP Income"]+df.loc["Office Travel Income"]
    df.loc["total_state_cuttings"] = df.loc["Tax"]+df.loc["Health Insurance"]+df.loc["Pension"]+df.loc["Unemployment Fund"]+df.loc["Nursing Care"]
    df.loc["total_net_income"] = df.loc["total_gross_income"]+df.loc["total_state_cuttings"]+df.loc["Directed to Company Pension"]-df.loc["Gift Coupon Income"]-df.loc["Pension Insentive Income"]
    df.loc["tot_living_exp"] = df.loc["Rent"]+df.loc["Phone & WiFi"]+df.loc["Groceries"]+df.loc["Travel Pass"]+df.loc["Gifts"]
    df.loc["tot_extra_exp"] = df.loc["Entertainment"]+df.loc["Vacation"]+df.loc["Coupon spendings"]+df.loc["Family & Kids"]+df.loc["Restaurant"]+df.loc["Taxi"]+df.loc["Cloths"]+df.loc["Gym & Self grooming"]
    df.loc["total_savings"] = df.loc["To India"]+df.loc["Company Pension as gift coupon spending"]+df.loc["Directed to Company Pension"]
    df.loc["complete_expenses"] = df.loc["tot_living_exp"]+df.loc["tot_extra_exp"]
    df.loc["old_balance"] = old_balance
    try:
        df["percentage"]=round((df["amount"]/df.loc["total_gross_income"].values[0])*100,2)
    except:
        df["percentage"] = 0
    return df

In [None]:
df_yearly_detailed_metrics = pd.pivot_table(df, index=['Sub-category'], values=['amount'], aggfunc=np.sum).reset_index()

df_yearly_detailed_metrics = calculate_sum_and_percentage(df_yearly_detailed_metrics)
df_yearly_detailed_metrics.loc["reamining_balance"] = df["Balance_Amount"].values[-1]
df_yearly_detailed_metrics = df_yearly_detailed_metrics.reset_index().replace([np.inf, -np.inf], np.nan).fillna(0)
set_with_dataframe(wk_sht.worksheet("Yearly_detailed_metrics"), df_yearly_detailed_metrics)
#df_yearly_detailed_metrics

In [None]:
df_monthly_detailed_metrics = pd.pivot_table(df, index=['month','Sub-category'], values=['amount'], aggfunc=np.sum).reset_index()
df_monthly_detailed_metrics_with_percentage = pd.DataFrame()
for month in df_monthly_detailed_metrics["month"].unique():
    temp_df = df_monthly_detailed_metrics[df_monthly_detailed_metrics["month"]==month][["Sub-category","amount"]].copy()
    temp_df = calculate_sum_and_percentage(temp_df)
    temp_df.loc["reamining_balance"] = df[df["month"]==month]["Balance_Amount"].values[-1]
    temp_df["month"] = month
    temp_df["percentage"] = temp_df["percentage"].fillna(0)
    df_monthly_detailed_metrics_with_percentage = pd.concat([df_monthly_detailed_metrics_with_percentage,temp_df],axis=0)

df_monthly_detailed_metrics_with_percentage = df_monthly_detailed_metrics_with_percentage.reset_index().replace([np.inf, -np.inf], np.nan).fillna(0)
set_with_dataframe(wk_sht.worksheet("Monthly_detailed_metrics"), df_monthly_detailed_metrics_with_percentage)
#df_monthly_detailed_metrics_with_percentage