Load Data in Jupyter Notebook

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV data
df = pd.read_csv("data/survey_data.csv")

# Convert expenses column from string to dictionary if necessary
def convert_expenses(expense_str):
    try:
        return eval(expense_str) if isinstance(expense_str, str) else expense_str
    except:
        return {}

df["expenses"] = df["expenses"].apply(convert_expenses)

# Extract expense categories into separate columns
expense_categories = ["utilities", "entertainment", "school_fees", "shopping", "healthcare"]
for category in expense_categories:
    df[category] = df["expenses"].apply(lambda x: x.get(category, 0))


Visualization 1: Age Groups with Highest Income

In [None]:
# Visualization 1: Ages with the highest income
plt.figure(figsize=(10, 5))
sns.barplot(x="age", y="income", data=df, estimator=sum, ci=None)
plt.title("Total Income by Age")
plt.xlabel("Age")
plt.ylabel("Total Income")
plt.xticks(rotation=45)
plt.savefig("visualizations/income_by_age.png")
plt.show()

Visualization 2: Gender-Based Spending Distribution

In [None]:
# Visualization 2: Gender distribution across spending categories
df_melted = df.melt(id_vars=["gender"], value_vars=expense_categories, var_name="Expense Category", value_name="Amount")
plt.figure(figsize=(12, 6))
sns.boxplot(x="Expense Category", y="Amount", hue="gender", data=df_melted)
plt.title("Spending Distribution by Gender")
plt.xlabel("Expense Category")
plt.ylabel("Amount Spent")
plt.xticks(rotation=45)
plt.legend(title="Gender")
plt.savefig("visualizations/spending_by_gender.png")
plt.show()

Save Visualizations

In [None]:
plt.savefig("images/spending_by_gender.png")
plt.savefig("images/income_by_age.png")

print("Visualizations generated and saved successfully!")