In [1]:
import pandas as pd

# Load the clustered dataset saved in 05
df = pd.read_csv("../outputs/05_clustered_data.csv")

print("Data shape:", df.shape)
df.head(3)


Data shape: (20000, 28)


Unnamed: 0,Income,Age,Dependents,Occupation,City_Tier,Rent,Loan_Repayment,Insurance,Groceries,Transport,...,Disposable_Income,Potential_Savings_Groceries,Potential_Savings_Transport,Potential_Savings_Eating_Out,Potential_Savings_Entertainment,Potential_Savings_Utilities,Potential_Savings_Healthcare,Potential_Savings_Education,Potential_Savings_Miscellaneous,cluster_k3
0,44637.249636,49,0,Self_Employed,Tier_1,13391.174891,0.0,2206.490129,6658.768341,2636.970696,...,11265.627707,1685.696222,328.895281,465.769172,195.15132,678.292859,67.682471,0.0,85.735517,0
1,26858.596592,34,2,Retired,Tier_2,5371.719318,0.0,869.522617,2818.44446,1543.018778,...,9676.818733,540.306561,119.347139,141.866089,234.131168,286.668408,6.603212,56.306874,97.388606,0
2,50367.605084,35,1,Student,Tier_3,7555.140763,4612.103386,2201.80005,6313.222081,3221.396403,...,13891.450624,1466.073984,473.549752,410.857129,459.965256,488.383423,7.290892,106.653597,138.542422,2


In [2]:
import numpy as np

# --- core signals for advice ---
df["savings_gap"]   = (df["Desired_Savings"] - df["Disposable_Income"]).round(2)
df["can_meet_goal"] = df["savings_gap"] <= 0

# discretionary categories
discretionary_cols = [c for c in ["Entertainment","Eating_Out","Groceries","Transport","Miscellaneous"] if c in df.columns]
df["discretionary_total"] = df[discretionary_cols].sum(axis=1).round(2) if discretionary_cols else 0.0

# potential savings columns
potential_cols = [c for c in df.columns if c.startswith("Potential_Savings_")]

def top_n_potential(row, n=3):
    vals = row[potential_cols]
    top = vals.sort_values(ascending=False).head(n)
    names = [c.replace("Potential_Savings_","") for c in top.index]
    return pd.Series(list(zip(names, top.values)), index=[f"top{i+1}" for i in range(len(top))])

if potential_cols:
    tops = df.apply(top_n_potential, axis=1)
    for i in range(1, 4):
        df[f"top{i}_category"] = tops.apply(lambda r: r.get(f"top{i}")[0] if isinstance(r.get(f"top{i}"), tuple) else np.nan, axis=1)
        df[f"top{i}_amount"]   = tops.apply(lambda r: float(r.get(f"top{i}")[1]) if isinstance(r.get(f"top{i}"), tuple) else np.nan, axis=1)

# quick peek
df[["Income","Disposable_Income","Desired_Savings","savings_gap","can_meet_goal",
    "discretionary_total","top1_category","top1_amount","top2_category","top2_amount","top3_category","top3_amount"]].head(5)


Unnamed: 0,Income,Disposable_Income,Desired_Savings,savings_gap,can_meet_goal,discretionary_total,top1_category,top1_amount,top2_category,top2_amount,top3_category,top3_amount
0,44637.249636,11265.627707,6200.537192,-5065.09,True,13315.25,Groceries,1685.696222,Utilities,678.292859,Eating_Out,465.769172
1,26858.596592,9676.818733,1923.176434,-7753.64,True,6625.32,Groceries,540.306561,Utilities,286.668408,Entertainment,234.131168
2,50367.605084,13891.450624,7050.360422,-6841.09,True,13400.11,Groceries,1466.073984,Utilities,488.383423,Transport,473.549752
3,101455.600247,31617.953615,16694.965136,-14922.99,True,32220.78,Groceries,1875.93277,Utilities,1389.815033,Eating_Out,1241.017448
4,24875.283548,6265.700532,1874.099434,-4391.6,True,6179.01,Groceries,788.953124,Utilities,194.11713,Entertainment,187.17375


In [3]:
def generate_advice(row):
    advice = []

    # 1. Savings goal check
    if row["can_meet_goal"]:
        advice.append(f"👍 You can meet your savings goal of {row['Desired_Savings']:.0f}.")
    else:
        advice.append(f"⚠️ You are short by {row['savings_gap']:.0f} to reach your savings goal.")

    # 2. Discretionary spending
    if row["discretionary_total"] > 0.3 * row["Income"]:
        advice.append("Your discretionary spending is high — consider trimming entertainment, eating out, or miscellaneous costs.")
    elif row["discretionary_total"] < 0.1 * row["Income"]:
        advice.append("Your discretionary spending is low — good discipline! You may have room to allocate more toward lifestyle or investments.")
    else:
        advice.append("Your discretionary spending looks balanced.")

    # 3. Top savings opportunities
    for i in range(1, 4):
        cat, amt = row.get(f"top{i}_category"), row.get(f"top{i}_amount")
        if pd.notna(cat) and amt > 0:
            advice.append(f"Potential savings: reduce {cat} by ~{amt:.0f}.")

    return " ".join(advice)

# Apply to a sample
df["advice"] = df.head(10).apply(generate_advice, axis=1)
df[["Income","Disposable_Income","Desired_Savings","advice"]].head(5)


Unnamed: 0,Income,Disposable_Income,Desired_Savings,advice
0,44637.249636,11265.627707,6200.537192,👍 You can meet your savings goal of 6201. Your...
1,26858.596592,9676.818733,1923.176434,👍 You can meet your savings goal of 1923. Your...
2,50367.605084,13891.450624,7050.360422,👍 You can meet your savings goal of 7050. Your...
3,101455.600247,31617.953615,16694.965136,👍 You can meet your savings goal of 16695. You...
4,24875.283548,6265.700532,1874.099434,👍 You can meet your savings goal of 1874. Your...


In [4]:
# Generate advice for all rows
df["advice"] = df.apply(generate_advice, axis=1)

# Save outputs
df.to_csv("../outputs/06_financial_advice.csv", index=False)

print("✅ Financial advice generated and saved to ../outputs/06_financial_advice.csv")
df[["Income","Disposable_Income","Desired_Savings","advice"]].head(10)


✅ Financial advice generated and saved to ../outputs/06_financial_advice.csv


Unnamed: 0,Income,Disposable_Income,Desired_Savings,advice
0,44637.249636,11265.627707,6200.537192,👍 You can meet your savings goal of 6201. Your...
1,26858.596592,9676.818733,1923.176434,👍 You can meet your savings goal of 1923. Your...
2,50367.605084,13891.450624,7050.360422,👍 You can meet your savings goal of 7050. Your...
3,101455.600247,31617.953615,16694.965136,👍 You can meet your savings goal of 16695. You...
4,24875.283548,6265.700532,1874.099434,👍 You can meet your savings goal of 1874. Your...
5,24875.610267,7599.2653,1478.666359,👍 You can meet your savings goal of 1479. Your...
6,106119.692762,41595.325723,18399.564592,👍 You can meet your savings goal of 18400. You...
7,55431.341508,8465.944597,6636.413893,👍 You can meet your savings goal of 6636. Your...
8,20606.733121,362.917872,362.917872,👍 You can meet your savings goal of 363. Your ...
9,46304.790235,15690.256276,5615.104745,👍 You can meet your savings goal of 5615. Your...


In [5]:
def rupees(x):
    try:
        return f"₹{x:,.0f}"
    except Exception:
        return str(x)

persona = {0: "Budget-conscious majority", 1: "Affluent elite", 2: "Comfortable middle"}

def generate_advice_v3(row):
    lines = []
    p = persona.get(row.get("cluster_k3"), "Segment")
    lines.append(f"[{p}]")

    # Savings goal
    desired = row["Desired_Savings"]
    gap = row["savings_gap"]
    if row["can_meet_goal"]:
        lines.append(f"• You can meet your savings goal of {rupees(desired)}. Set an auto-transfer for {rupees(desired)} right after income posts.")
    else:
        lines.append(f"• Short by {rupees(abs(gap))} to reach your goal of {rupees(desired)}. Aim to free up this amount this month.")

    # Discretionary guidance
    dispo = max(row["Disposable_Income"], 1e-9)
    disc = row.get("discretionary_total", 0.0)
    ratio = disc / dispo if dispo > 0 else 0
    if ratio > 0.7:
        lines.append("• Discretionary spend is very high vs disposable income — trim entertainment/eating out first.")
    elif ratio > 0.4:
        lines.append("• Discretionary spend is elevated — target a 10–15% reduction.")
    elif ratio < 0.2:
        lines.append("• Discretionary spend is lean — consider allocating a portion to investments/emergency fund.")
    else:
        lines.append("• Discretionary spend looks balanced — keep channeling surplus into goals.")

    # Top savings levers (>= ₹100)
    tops = []
    for i in range(1, 4):
        cat = row.get(f"top{i}_category")
        amt = row.get(f"top{i}_amount", 0)
        if pd.notna(cat) and amt and amt >= 100:
            tops.append(f"{cat}: {rupees(amt)}")
    if tops:
        lines.append("• Top savings levers → " + "; ".join(tops) + ".")

    # Segment-specific next step
    seg = row.get("cluster_k3")
    if seg == 0:
        lines.append("• Next: build/maintain a 3-month emergency fund before increasing investments.")
    elif seg == 1:
        lines.append("• Next: raise automated investing (tax-advantaged first) and review insurance coverage limits.")
    elif seg == 2:
        lines.append("• Next: target a 15–20% total savings rate and automate contributions.")

    return "\n".join(lines)

# Apply and preview for first 5 rows
pd.set_option("display.max_colwidth", None)
df["advice"] = df.apply(generate_advice_v3, axis=1)
df.loc[:4, ["Income","Disposable_Income","Desired_Savings","advice"]]


Unnamed: 0,Income,Disposable_Income,Desired_Savings,advice
0,44637.249636,11265.627707,6200.537192,"[Budget-conscious majority] ✅ You can meet your savings goal of $6,201. Set an automatic transfer of $6,201 right after income hits. Discretionary spend is very high relative to disposable income — trim entertainment/eating out first. Top savings levers → Groceries: $1,686; Utilities: $678; Eating_Out: $466. Next step: build/maintain a 3-month emergency fund before increasing investments."
1,26858.596592,9676.818733,1923.176434,"[Budget-conscious majority] ✅ You can meet your savings goal of $1,923. Set an automatic transfer of $1,923 right after income hits. Discretionary spend is elevated — a 10–15% cut would help reach goals faster. Top savings levers → Groceries: $540; Utilities: $287; Entertainment: $234. Next step: build/maintain a 3-month emergency fund before increasing investments."
2,50367.605084,13891.450624,7050.360422,"[Comfortable middle] ✅ You can meet your savings goal of $7,050. Set an automatic transfer of $7,050 right after income hits. Discretionary spend is very high relative to disposable income — trim entertainment/eating out first. Top savings levers → Groceries: $1,466; Utilities: $488; Transport: $474. Next step: target 15–20% total savings rate and automate contributions."
3,101455.600247,31617.953615,16694.965136,"[Comfortable middle] ✅ You can meet your savings goal of $16,695. Set an automatic transfer of $16,695 right after income hits. Discretionary spend is very high relative to disposable income — trim entertainment/eating out first. Top savings levers → Groceries: $1,876; Utilities: $1,390; Eating_Out: $1,241. Next step: target 15–20% total savings rate and automate contributions."
4,24875.283548,6265.700532,1874.099434,"[Budget-conscious majority] ✅ You can meet your savings goal of $1,874. Set an automatic transfer of $1,874 right after income hits. Discretionary spend is very high relative to disposable income — trim entertainment/eating out first. Top savings levers → Groceries: $789; Utilities: $194; Entertainment: $187. Next step: build/maintain a 3-month emergency fund before increasing investments."
