In [85]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

random.seed(42)
np.random.seed(42)

ACCOUNT_HOLDER = "John Doe"

START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2024, 6, 30)

MONTHLY_INCOME = 60000

MERCHANTS = {
    "Food": ["SWIGGY", "ZOMATO", "CAFE COFFEE DAY"],
    "Transport": ["UBER", "OLA"],
    "Shopping": ["AMAZON", "FLIPKART"],
    "Entertainment": ["NETFLIX", "SPOTIFY"],
    "Utilities": ["ELECTRICITY BILL", "MOBILE RECHARGE"],
    "Housing": ["HOUSE RENT"],
    "Healthcare": ["APOLLO PHARMACY"],
}

FIXED_EXPENSES = {
    "HOUSE RENT": 15000,
    "NETFLIX": 799,
    "SPOTIFY": 119,
    "MOBILE RECHARGE": 499,
    "ELECTRICITY BILL": 1200,
}

def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

transactions = []
balance = 0  # opening balance

current_date = START_DATE

while current_date <= END_DATE:
    # Salary credit (once per month)
    if current_date.day == 1:
        balance += MONTHLY_INCOME
        transactions.append({
            "account_holder": ACCOUNT_HOLDER,
            "date": current_date,
            "description": "SALARY CREDIT",
            "debit_amount": 0,
            "credit_amount": MONTHLY_INCOME,
            "balance": balance
        })

    # Fixed expenses
    for merchant, amount in FIXED_EXPENSES.items():
        if random.random() < 0.03:  # roughly monthly
            balance -= amount
            transactions.append({
                "account_holder": ACCOUNT_HOLDER,
                "date": current_date,
                "description": merchant,
                "debit_amount": amount,
                "credit_amount": 0,
                "balance": balance
            })

    # Random discretionary spend
    if random.random() < 0.4:
        category = random.choice(list(MERCHANTS.keys()))
        merchant = random.choice(MERCHANTS[category])
        amount = random.randint(100, 2500)

        if balance - amount > 0:
            balance -= amount
            transactions.append({
                "account_holder": ACCOUNT_HOLDER,
                "date": current_date,
                "description": merchant,
                "debit_amount": amount,
                "credit_amount": 0,
                "balance": balance
            })

    current_date += timedelta(days=1)

df = pd.DataFrame(transactions)
df = df.sort_values("date").reset_index(drop=True)

df.to_csv("synthetic_hdfc_john_doe.csv", index=False)

print("Synthetic dataset generated:", df.shape)


Synthetic dataset generated: (90, 6)


checking validation

In [86]:
import pandas as pd

In [87]:
df

Unnamed: 0,account_holder,date,description,debit_amount,credit_amount,balance
0,John Doe,2024-01-01,SALARY CREDIT,0,60000,60000
1,John Doe,2024-01-01,NETFLIX,799,0,59201
2,John Doe,2024-01-02,MOBILE RECHARGE,499,0,58702
3,John Doe,2024-01-03,HOUSE RENT,15000,0,43702
4,John Doe,2024-01-04,NETFLIX,799,0,42903
...,...,...,...,...,...,...
85,John Doe,2024-06-22,OLA,1285,0,184124
86,John Doe,2024-06-23,NETFLIX,799,0,183325
87,John Doe,2024-06-24,ZOMATO,418,0,182907
88,John Doe,2024-06-25,ZOMATO,803,0,182104


In [88]:
df["prev_balance"] = df["balance"].shift(1)

df["expected_balance"] = (
    df["prev_balance"]
    + df["credit_amount"]
    - df["debit_amount"]
)

df["balance_consistent"] = (
    df["expected_balance"] == df["balance"]
)

df.loc[df.index[0], "balance_consistent"] = True

In [89]:
df

Unnamed: 0,account_holder,date,description,debit_amount,credit_amount,balance,prev_balance,expected_balance,balance_consistent
0,John Doe,2024-01-01,SALARY CREDIT,0,60000,60000,,,True
1,John Doe,2024-01-01,NETFLIX,799,0,59201,60000.0,59201.0,True
2,John Doe,2024-01-02,MOBILE RECHARGE,499,0,58702,59201.0,58702.0,True
3,John Doe,2024-01-03,HOUSE RENT,15000,0,43702,58702.0,43702.0,True
4,John Doe,2024-01-04,NETFLIX,799,0,42903,43702.0,42903.0,True
...,...,...,...,...,...,...,...,...,...
85,John Doe,2024-06-22,OLA,1285,0,184124,185409.0,184124.0,True
86,John Doe,2024-06-23,NETFLIX,799,0,183325,184124.0,183325.0,True
87,John Doe,2024-06-24,ZOMATO,418,0,182907,183325.0,182907.0,True
88,John Doe,2024-06-25,ZOMATO,803,0,182104,182907.0,182104.0,True


In [90]:
import datetime as dt

In [91]:
df["is_weekend"] = df["date"].dt.weekday >= 5

In [92]:
df

Unnamed: 0,account_holder,date,description,debit_amount,credit_amount,balance,prev_balance,expected_balance,balance_consistent,is_weekend
0,John Doe,2024-01-01,SALARY CREDIT,0,60000,60000,,,True,False
1,John Doe,2024-01-01,NETFLIX,799,0,59201,60000.0,59201.0,True,False
2,John Doe,2024-01-02,MOBILE RECHARGE,499,0,58702,59201.0,58702.0,True,False
3,John Doe,2024-01-03,HOUSE RENT,15000,0,43702,58702.0,43702.0,True,False
4,John Doe,2024-01-04,NETFLIX,799,0,42903,43702.0,42903.0,True,False
...,...,...,...,...,...,...,...,...,...,...
85,John Doe,2024-06-22,OLA,1285,0,184124,185409.0,184124.0,True,True
86,John Doe,2024-06-23,NETFLIX,799,0,183325,184124.0,183325.0,True,True
87,John Doe,2024-06-24,ZOMATO,418,0,182907,183325.0,182907.0,True,False
88,John Doe,2024-06-25,ZOMATO,803,0,182104,182907.0,182104.0,True,False


In [93]:
df['is_weekend'].value_counts()

is_weekend
False    64
True     26
Name: count, dtype: int64

In [94]:
total_days = df['is_weekend'].shape
total_weekend = df["is_weekend"].sum()
total_weekday = total_days - total_weekend

In [95]:
total_weekday = total_days - total_weekend

In [96]:
expenses = df[df["debit_amount"] > 0]

observed_spend = (
    expenses
    .groupby("is_weekend")["debit_amount"]
    .sum()
)

observed_weekend_spend = observed_spend.get(True, 0)
observed_weekday_spend = observed_spend.get(False, 0)

print("Observed weekend total spend:", observed_weekend_spend)
print("Observed weekday total spend:", observed_weekday_spend)


Observed weekend total spend: 54918
Observed weekday total spend: 123390


In [97]:
average_weekday_spend = observed_weekday_spend / total_weekday
average_weekend_spend = observed_weekend_spend / total_weekend
average_weekday_spend = average_weekday_spend.item()
print("Average weekend spend", average_weekend_spend)
print("Average weekday spend", average_weekday_spend)

Average weekend spend 2112.230769230769
Average weekday spend 1927.96875


Recurring Expense Load

In [98]:
df["year_month"] = df["date"].dt.to_period("M")


In [99]:
expenses = df[df["debit_amount"] > 0]

In [100]:
df

Unnamed: 0,account_holder,date,description,debit_amount,credit_amount,balance,prev_balance,expected_balance,balance_consistent,is_weekend,year_month
0,John Doe,2024-01-01,SALARY CREDIT,0,60000,60000,,,True,False,2024-01
1,John Doe,2024-01-01,NETFLIX,799,0,59201,60000.0,59201.0,True,False,2024-01
2,John Doe,2024-01-02,MOBILE RECHARGE,499,0,58702,59201.0,58702.0,True,False,2024-01
3,John Doe,2024-01-03,HOUSE RENT,15000,0,43702,58702.0,43702.0,True,False,2024-01
4,John Doe,2024-01-04,NETFLIX,799,0,42903,43702.0,42903.0,True,False,2024-01
...,...,...,...,...,...,...,...,...,...,...,...
85,John Doe,2024-06-22,OLA,1285,0,184124,185409.0,184124.0,True,True,2024-06
86,John Doe,2024-06-23,NETFLIX,799,0,183325,184124.0,183325.0,True,True,2024-06
87,John Doe,2024-06-24,ZOMATO,418,0,182907,183325.0,182907.0,True,False,2024-06
88,John Doe,2024-06-25,ZOMATO,803,0,182104,182907.0,182104.0,True,False,2024-06


In [101]:
recurring_candidates = (
    expenses
    .groupby("description")["year_month"]
    .nunique()
)

recurring_merchants = recurring_candidates[
    recurring_candidates >= 2
].index

In [102]:
recurring_merchants

Index(['AMAZON', 'APOLLO PHARMACY', 'ELECTRICITY BILL', 'FLIPKART',
       'HOUSE RENT', 'MOBILE RECHARGE', 'NETFLIX', 'OLA', 'SPOTIFY', 'SWIGGY',
       'ZOMATO'],
      dtype='object', name='description')

In [None]:
recurring_expenses = expenses[
    expenses["description"].isin(recurring_merchants)
]

total_recurring_spend = recurring_expenses["debit_amount"].sum()
total_recurring_spend

173326

In [104]:
income = df[df["credit_amount"] > 0]
monthly_income = (
    income
    .groupby("year_month")["credit_amount"]
    .sum()
    .mean()
)

In [105]:
months = df['year_month'].unique()
num_months = months.shape

In [106]:
avg_monthly_recurring = total_recurring_spend / num_months
recurring_load_pct = avg_monthly_recurring / monthly_income * 100
avg_monthly_recurring = avg_monthly_recurring.item()
recurring_load_pct = recurring_load_pct.item()
print("average monthly recurring", avg_monthly_recurring)
print("Recurring load",recurring_load_pct)

average monthly recurring 28887.666666666668
Recurring load 48.14611111111111


In [107]:
FIXED_MERCHANTS = {
    "HOUSE RENT",
    "ELECTRICITY BILL",
    "MOBILE RECHARGE",
    "NETFLIX",
    "SPOTIFY"
}

HABITUAL_MERCHANTS = {
    "SWIGGY",
    "ZOMATO",
    "AMAZON",
    "FLIPKART",
    "OLA"
}


In [108]:
fixed_recurring = recurring_expenses[
    recurring_expenses["description"].isin(FIXED_MERCHANTS)
]

habitual_recurring = recurring_expenses[
    recurring_expenses["description"].isin(HABITUAL_MERCHANTS)
]


In [109]:
num_months = recurring_expenses["year_month"].nunique()

avg_fixed_monthly = fixed_recurring["debit_amount"].sum() / num_months
avg_habitual_monthly = habitual_recurring["debit_amount"].sum() / num_months
print("Average fixed monthly",avg_fixed_monthly)
print("Average habitual monthly", avg_habitual_monthly)

Average fixed monthly 23106.833333333332
Average habitual monthly 4093.0


In [110]:
fixed_load_pct = avg_fixed_monthly / monthly_income * 100
habitual_load_pct = avg_habitual_monthly / monthly_income * 100
print("Fixed load",fixed_load_pct)
print("Habitual load", habitual_load_pct)

Fixed load 38.51138888888889
Habitual load 6.821666666666666


Savings feasibility

In [111]:
avg_total_monthly_spend = expenses["debit_amount"].sum() / num_months

non_recurring_discretionary = (
    avg_total_monthly_spend
    - avg_fixed_monthly
    - avg_habitual_monthly
)
available_to_save = monthly_income - avg_total_monthly_spend
available_to_save

30282.0

In [112]:
target_savings = float(input())  # user-defined


In [113]:
if available_to_save >= target_savings:
    status = "Feasible"
elif available_to_save + avg_habitual_monthly >= target_savings:
    status = "Feasible with habit adjustment"
else:
    status = "Not feasible without major changes"


In [114]:
print("Target savings",target_savings)
print("Available to save",available_to_save)
print("Status",status)


Target savings 10000.0
Available to save 30282.0
Status Feasible


In [115]:
required_cut = target_savings - available_to_save


In [116]:
import sys, os
sys.path.append(os.getcwd())


In [117]:
!pip install google-genai



In [118]:
!pip install --upgrade typing_extensions




In [119]:
import google.genai as genai
import os

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
models = client.models.list()
for m in models:
    print(m.name)


models/embedding-gecko-001
models/gemini-2.5-flash
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-exp-1206
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it
models/gemini-flash-latest
models/gemini-flash-lite-latest
models/gemini-pro-latest
models/gemini-2.5-flash-lite
models/gemini-2.5-flash-image-preview
models/gemini-2.5-flash-image
models/gemini-2.5-flash-preview-09-2025
models/gemini-2.5-flash-lite-preview-09-2025
models/gemini-3-pro-preview
models/gemini-3-flash-preview
models/gemini-3-pro-image-preview
models/nano-banana-pro-preview
models/gemini-robotics-er-1.5-preview
models/g

In [120]:
import google.genai as genai
import os

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

def generate_savings_explanation(summary: dict) -> str:
    prompt = f"""
You are a personal finance assistant.

Explain the savings feasibility clearly and honestly.
Do not give false hope. Do not shame the user.

Facts:
- Monthly income: ₹{summary['monthly_income']}
- Fixed recurring expenses: {summary['fixed_load_pct']:.1f}% of income
- Habitual recurring expenses: {summary['habitual_load_pct']:.1f}% of income
- Available to save per month: ₹{summary['available_to_save']}
- Target savings: ₹{summary['target_savings']}
- Savings shortfall: ₹{summary['shortfall']}
- Maximum possible habit-based reduction: ₹{summary['max_possible_behavioral_cut']}
"""
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )
    return response.text


In [121]:


summary = {
    "monthly_income": monthly_income,
    "fixed_load_pct": fixed_load_pct,
    "habitual_load_pct": habitual_load_pct,
    "available_to_save": available_to_save,
    "target_savings": target_savings,
    "shortfall": target_savings - available_to_save,
    "max_possible_behavioral_cut": avg_habitual_monthly
}

explanation = generate_savings_explanation(summary)
print(explanation)


Based on the financial facts you've provided, your savings feasibility is **exceptionally strong**.

Here's an honest and clear breakdown:

*   **Your Monthly Income:** ₹60,000
*   **Your Target Savings:** ₹10,000 per month
*   **Amount Actually Available to Save:** ₹30,282 per month

The most significant indicator is your **"Savings shortfall" of ₹-20,282.0**. This isn't a deficit; rather, it means you have a **surplus of ₹20,282 *above* your target savings of ₹10,000.** In essence, you are already able to save ₹30,282 each month, which is more than three times your target amount!

This puts you in a fantastic financial position to not only meet but significantly exceed your monthly savings goal.

Regarding your expenses:
*   Your Fixed Recurring Expenses are 38.5% of your income.
*   Your Habitual Recurring Expenses are 6.8% of your income.

You also mentioned a **"Maximum possible habit-based reduction" of ₹4,093.0**. While this highlights an area where you could potentially cut dow