In [None]:
import pandas as pd

# Load the dataset
url = "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv"
data = pd.read_csv(url)

# Data Cleaning: Ensure column names are standardized (lowercase, underscores)
data.columns = data.columns.str.lower().str.replace(' ', '_')

# Creating a new DataFrame for customers with total_claim_amount > $1,000 and response "Yes"
filtered_data = data[(data['total_claim_amount'] > 1000) & (data['response'] == 'Yes')]

# Display the first few rows of the filtered DataFrame
print("Filtered Data with total_claim_amount > $1,000 and response 'Yes':")
print(filtered_data.head())

# Analyzing the average total_claim_amount by policy type and gender for customers who responded "Yes"
avg_claims_by_policy_gender = data[data['response'] == 'Yes'].pivot_table(
    values='total_claim_amount',
    index='policy_type',
    columns='gender',
    aggfunc='mean'
).round(2)

print("\nAverage total_claim_amount by policy type and gender for customers who responded 'Yes':")
print(avg_claims_by_policy_gender)

# Analyzing the total number of customers in each state
customer_counts_by_state = data['state'].value_counts()

# Filtering results to only include states with more than 500 customers
states_with_over_500_customers = customer_counts_by_state[customer_counts_by_state > 500]

print("\nTotal number of customers per state (only states with > 500 customers):")
print(states_with_over_500_customers)

# Finding maximum, minimum, and median customer lifetime value by education level and gender
lifetime_value_stats = data.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median']).round(2)

print("\nMaximum, minimum, and median customer lifetime value by education level and gender:")
print(lifetime_value_stats)

# Conclusions
print("\nConclusions:")
print("1. The filtered DataFrame has customers who have claimed more than $1,000 and responded positively to marketing.")
print("2. The average total claim amount varies by policy type and gender, indicating differing behaviors based on these demographics.")
print("3. States with more than 500 customers have significant concentration, which may be important for targeted marketing strategies.")
print("4. Customer lifetime value statistics reveal educational and gender trends that can inform customer segmentation and tailored marketing approaches.")