# Income Spending Analysis
## Data Visualization for Healthcare Product Launch

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set style for plots
plt.style.use('default')
sns.set_palette("husl")

# Load the data
df = pd.read_csv('../user_data.csv')

# Display first few rows
print("Data Overview:")
display(df.head())

# Display basic statistics
print("\nBasic Statistics:")
display(df.describe())

In [None]:
# 1. Show ages with the highest income
plt.figure(figsize=(12, 6))
age_income = df.groupby('age')['total_income'].mean().sort_values(ascending=False)
age_income.plot(kind='bar', color='lightcoral')
plt.title('Average Income by Age', fontsize=16, fontweight='bold')
plt.xlabel('Age', fontsize=12)
plt.ylabel('Average Income ($)', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('age_income_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("Top 5 Ages by Average Income:")
print(age_income.head())

In [None]:
# 2. Show gender distribution across spending categories
gender_expenses = df.groupby('gender')[['utilities', 'entertainment', 'school_fees', 'shopping', 'healthcare']].mean()

plt.figure(figsize=(14, 7))
gender_expenses.plot(kind='bar')
plt.title('Average Spending by Gender Across Categories', fontsize=16, fontweight='bold')
plt.xlabel('Gender', fontsize=12)
plt.ylabel('Average Spending ($)', fontsize=12)
plt.xticks(rotation=0)
plt.legend(title='Expense Categories', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('gender_spending_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("Average Spending by Gender:")
display(gender_expenses)

In [None]:
# 3. Additional analysis: Income vs Healthcare spending
plt.figure(figsize=(10, 6))
plt.scatter(df['total_income'], df['healthcare'], alpha=0.6, color='purple')
plt.title('Income vs Healthcare Spending', fontsize=16, fontweight='bold')
plt.xlabel('Total Income ($)', fontsize=12)
plt.ylabel('Healthcare Spending ($)', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('income_vs_healthcare.png', dpi=300, bbox_inches='tight')
plt.show()

# Calculate correlation
correlation = df['total_income'].corr(df['healthcare'])
print(f"Correlation between income and healthcare spending: {correlation:.2f}")

## Summary of Findings

Based on the analysis of the current data:

1. **Income by Age**: [Add your observations here]
2. **Spending Patterns by Gender**: [Add your observations here]
3. **Healthcare Spending**: [Add your observations here]

These insights will help inform our healthcare product launch strategy.