In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load engineered data
df = pd.read_csv('../data/engineered_data.csv')

# === Plot 1: Boxplot for Income (99th percentile cap) ===
income_cap = df['Income'].quantile(0.99)
filtered_df = df[df['Income'] <= income_cap]

plt.figure(figsize=(10, 6))
sns.boxplot(x='Income', data=filtered_df)
plt.title("Income Distribution (Capped at 99th percentile)")
plt.savefig('../output/plots/income_boxplot.png')
plt.show()

# === Plot 2: Histogram for Age ===
plt.figure(figsize=(10, 6))
df['Age'].plot(kind='hist', bins=20, edgecolor='black', title="Age Distribution")
plt.xlabel("Age")
plt.savefig('../output/plots/age_histogram.png')
plt.show()

# === Plot 3: Correlation heatmap (numerical only) ===
plt.figure(figsize=(12, 8))
sns.heatmap(df.select_dtypes(include='number').corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.savefig('../output/plots/correlation_heatmap.png')
plt.show()

# === Plot 4: Marital Status Bar Plot ===
plt.figure(figsize=(8, 5))
sns.countplot(x='Marital_Status', data=df, order=df['Marital_Status'].value_counts().index)
plt.title("Distribution of Marital Status")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../output/plots/marital_status_barplot.png')
plt.show()

# === Plot 5: Education Level Bar Plot ===
plt.figure(figsize=(8, 5))
sns.countplot(x='Education', data=df, order=df['Education'].value_counts().index)
plt.title("Distribution of Education Levels")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../output/plots/education_barplot.png')
plt.show()

# === Plot 6: Spending by Product Type ===
product_cols = ['MntWines', 'MntFruits', 'MntMeatProducts',
                'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
product_totals = df[product_cols].sum().sort_values(ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x=product_totals.index, y=product_totals.values, palette='muted')
plt.title("Total Spending by Product Type")
plt.ylabel("Total Amount")
plt.tight_layout()
plt.savefig('../output/plots/product_spending_barplot.png')
plt.show()

# === Plot 7: Age vs Campaign Response (Response = 1) ===
plt.figure(figsize=(10, 6))
sns.boxplot(x='Response', y='Age', data=df)
plt.title("Age vs Campaign Acceptance")
plt.savefig('../output/plots/age_vs_response_boxplot.png')
plt.show()

# === Plot 8: Country vs Campaign Acceptance ===
if 'Country_US' in df.columns:
    country_col = [col for col in df.columns if col.startswith('Country_')]
    country_acceptance = {}

    for col in country_col:
        country_name = col.split('_', 1)[1]
        country_acceptance[country_name] = df[df[col] == 1]['Response'].sum()

    country_acceptance = pd.Series(country_acceptance).sort_values(ascending=False)

    plt.figure(figsize=(10, 6))
    sns.barplot(x=country_acceptance.index, y=country_acceptance.values)
    plt.title("Campaign Acceptances by Country")
    plt.ylabel("Number of Acceptances")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('../output/plots/campaign_acceptance_by_country.png')
    plt.show()

# === Plot 9: Children vs Total Spending ===
plt.figure(figsize=(10, 6))
sns.boxplot(x='Total_Children', y='Total_Spending', data=df)
plt.title("Total Spending by Number of Children")
plt.savefig('../output/plots/children_vs_spending_boxplot.png')
plt.show()

# === Plot 10: Complaints vs Education ===
plt.figure(figsize=(10, 6))
sns.countplot(x='Education', hue='Complain', data=df)
plt.title("Complaints by Education Level")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../output/plots/education_vs_complaints_barplot.png')
plt.show()
