In [1]:
import pandas as pd
import numpy as np

# Load the marketing customer data
url = "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv"
df = pd.read_csv(url)

print(df.head())
print(df.columns)


   Unnamed: 0 Customer       State  Customer Lifetime Value Response  \
0           0  DK49336     Arizona              4809.216960       No   
1           1  KX64629  California              2228.525238       No   
2           2  LZ68649  Washington             14947.917300       No   
3           3  XL78013      Oregon             22332.439460      Yes   
4           4  QA50777      Oregon              9025.067525       No   

   Coverage Education Effective To Date EmploymentStatus Gender  ...  \
0     Basic   College           2/18/11         Employed      M  ...   
1     Basic   College           1/18/11       Unemployed      F  ...   
2     Basic  Bachelor           2/10/11         Employed      M  ...   
3  Extended   College           1/11/11         Employed      M  ...   
4   Premium  Bachelor           1/17/11    Medical Leave      F  ...   

   Number of Open Complaints Number of Policies     Policy Type        Policy  \
0                        0.0                  9  Corp

In [7]:
# Filter for customers with total_claim_amount > 1000 and response "Yes"
df_filtered = df[(df['Total Claim Amount'] > 1000) & (df['Response'] == "Yes")]
print("Filtered DataFrame shape:", df_filtered.shape)
print(df_filtered.head())


Filtered DataFrame shape: (67, 26)
     Unnamed: 0 Customer       State  Customer Lifetime Value Response  \
189         189  OK31456  California             11009.130490      Yes   
236         236  YJ16163      Oregon             11009.130490      Yes   
419         419  GW43195      Oregon             25807.063000      Yes   
442         442  IP94270     Arizona             13736.132500      Yes   
587         587  FJ28407  California              5619.689084      Yes   

     Coverage             Education Effective To Date EmploymentStatus Gender  \
189   Premium              Bachelor           1/24/11         Employed      F   
236   Premium              Bachelor           1/24/11         Employed      F   
419  Extended               College           2/13/11         Employed      F   
442   Premium                Master           2/13/11         Disabled      F   
587   Premium  High School or Below           1/26/11       Unemployed      M   

     ...  Number of Open Complain

In [9]:
# First, filter the original DataFrame for customers who responded "Yes"
df_response_yes = df[df['Response'] == "Yes"]

# Group by policy_type and gender, and calculate the average total_claim_amount
avg_claim = (df_response_yes
             .groupby(['Policy Type', 'Gender'])['Total Claim Amount']
             .mean()
             .round(2)
             .reset_index())

print("Average Total Claim Amount by Policy Type and Gender (for Response = 'Yes'):")
print(avg_claim)


Average Total Claim Amount by Policy Type and Gender (for Response = 'Yes'):
      Policy Type Gender  Total Claim Amount
0  Corporate Auto      F              433.74
1  Corporate Auto      M              408.58
2   Personal Auto      F              452.97
3   Personal Auto      M              457.01
4    Special Auto      F              453.28
5    Special Auto      M              429.53


In [11]:
# Count the number of customers per state (each row represents one customer)
customers_by_state = df.groupby('State').size().reset_index(name='Customer Count')

# Filter to include only states with more than 500 customers
states_over_500 = customers_by_state[customers_by_state['Customer Count'] > 500]
print("States with more than 500 customers:")
print(states_over_500)


States with more than 500 customers:
        State  Customer Count
0     Arizona            1937
1  California            3552
2      Nevada             993
3      Oregon            2909
4  Washington             888


In [12]:
# Group by education and gender, and calculate max, min, and median for customer_lifetime_value
ltv_stats = (df.groupby(['Education', 'Gender'])['Customer Lifetime Value']
             .agg(['max', 'min', 'median'])
             .reset_index())

# Round median for better readability
ltv_stats['median'] = ltv_stats['median'].round(2)
print("Customer Lifetime Value Stats by Education and Gender:")
print(ltv_stats)


Customer Lifetime Value Stats by Education and Gender:
              Education Gender          max          min   median
0              Bachelor      F  73225.95652  1904.000852  5640.51
1              Bachelor      M  67907.27050  1898.007675  5548.03
2               College      F  61850.18803  1898.683686  5623.61
3               College      M  61134.68307  1918.119700  6005.85
4                Doctor      F  44856.11397  2395.570000  5332.46
5                Doctor      M  32677.34284  2267.604038  5577.67
6  High School or Below      F  55277.44589  2144.921535  6039.55
7  High School or Below      M  83325.38119  1940.981221  6286.73
8                Master      F  51016.06704  2417.777032  5729.86
9                Master      M  50568.25912  2272.307310  5579.10
