In [5]:
import pandas as pd


You are a Business Analyst on the **Starbucks** Rewards team investigating customer transaction behavior. Your team wants to understand how loyalty program membership influences purchasing patterns. The goal is to compare transaction metrics between loyalty members and non-members.

In [6]:
# Load customer data with `is_loyalty_member` as boolean
dim_customers = pd.read_csv('dim_customers.csv')
dim_customers['is_loyalty_member'] = dim_customers['is_loyalty_member'].astype(bool)

# Load transaction data with `transaction_date` as datetime
fct_transactions = pd.read_csv('fct_transactions.csv')
fct_transactions['transaction_date'] = pd.to_datetime(fct_transactions['transaction_date'])

# Display the datasets
print("Customer Data:")
print(dim_customers)

print("\nTransaction Data:")
print(fct_transactions)


Customer Data:
    customer_id  is_loyalty_member
0             1               True
1             2              False
2             3               True
3             4              False
4             5               True
5             6               True
6             7              False
7             8               True
8             9              False
9            10              False
10           11               True
11           12              False

Transaction Data:
    customer_id  transaction_id transaction_date  transaction_value
0             1             101       2024-07-05               5.50
1             1             102       2024-07-15               7.25
2             2             103       2024-07-10               4.00
3             3             104       2024-07-20               8.75
4             4             105       2024-07-03               6.50
5             5             106       2024-07-22               9.00
6             6             107    

### Question 1 of 3

For the month of July 2024, how many transactions did loyalty program members and non-members make? Compare the transaction counts between these two groups.

In [None]:
# Filter transactions for July 2024
july = fct_transactions[
    fct_transactions["transaction_date"].between("2024-07-01", "2024-07-31")
].copy()

# Attach loyalty flag
july = july.merge(
    dim_customers[["customer_id", "is_loyalty_member"]],
    on="customer_id",
    how="left"
)

# Count transactions by membership
counts = (
    july
    .groupby(july["is_loyalty_member"].map({True: "Member", False: "Non-member"}))
    .size()
    .rename("transaction_count")
    .reindex(["Member", "Non-member"])
    .fillna(0)
    .astype(int)
)

# Simple comparison
member_count = int(counts.get("Member", 0))
non_member_count = int(counts.get("Non-member", 0))

# Display the results
print(f"\nMembers: {member_count} | Non-members: {non_member_count}")
if non_member_count > 0:
    diff = member_count - non_member_count
    ratio = member_count / non_member_count
    print(f"Difference: {diff:+} | Ratio (Members/Non-members): {ratio:.2f}x")
else:
    print("Non-members have 0 transactions in July 2024; ratio not defined.")



Members: 10 | Non-members: 6
Difference: +4 | Ratio (Members/Non-members): 1.67x


### Quastion 2 of 3

What is the average transaction value for loyalty program members and non-members during July 2024? Use this to identify which group has a higher average transaction value.

In [None]:
# Average transaction value for July 2024 by membership

# Reuse july from Q1 if available; otherwise build it
try:
    july
except NameError:
    july = fct_transactions[
        fct_transactions["transaction_date"].between("2024-07-01", "2024-07-31")
    ].copy()
    july = july.merge(
        dim_customers[["customer_id", "is_loyalty_member"]],
        on="customer_id",
        how="left"
    )

# Compute averages
avg = (
    july.dropna(subset=["transaction_value"])
    .groupby(july["is_loyalty_member"].map({True: "Member", False: "Non-member"}))["transaction_value"]
    .mean()
    .rename("avg_transaction_value")
    .reindex(["Member", "Non-member"])
)

member_avg = avg.get("Member")
non_member_avg = avg.get("Non-member")

if pd.notna(member_avg) and pd.notna(non_member_avg):
    if member_avg > non_member_avg:
        higher = "Members"
    elif member_avg < non_member_avg:
        higher = "Non-members"
    else:
        higher = "Tie"
    print(f"\nMembers: ${member_avg:.2f} | Non-members: ${non_member_avg:.2f} | Higher: {higher}")
elif pd.notna(member_avg):
    print(f"\nMembers: ${member_avg:.2f} | Non-members: N/A")
elif pd.notna(non_member_avg):
    print(f"\nMembers: N/A | Non-members: ${non_member_avg:.2f}")
else:
    print("\nNo valid transaction values in July 2024.")



Members: $8.80 | Non-members: $4.92 | Higher: Members


### Question 3 of 3

Determine the percentage difference in average transaction value between loyalty program members and non-members for July 2024.

Think about how you can build on your previous work calculating averages, and then compute the percentage difference between the two groups.

In [None]:
# Percentage difference in average transaction value (Members vs Non-members) for July 2024

# Ensure 'avg' exists from Q2; otherwise compute it
try:
    avg  # noqa: F821
except NameError:
    july = fct_transactions[
        fct_transactions["transaction_date"].between("2024-07-01", "2024-07-31")
    ].copy()
    july = july.merge(
        dim_customers[["customer_id", "is_loyalty_member"]],
        on="customer_id",
        how="left",
    )
    avg = (
        july.dropna(subset=["transaction_value"])
        .groupby(july["is_loyalty_member"].map({True: "Member", False: "Non-member"}))["transaction_value"]
        .mean()
        .rename("avg_transaction_value")
        .reindex(["Member", "Non-member"])
    )

member_avg = avg.get("Member")
non_member_avg = avg.get("Non-member")

if pd.notna(member_avg) and pd.notna(non_member_avg):
    if non_member_avg != 0:
        pct_diff = (member_avg - non_member_avg) / non_member_avg * 100
        print(f"Members: ${member_avg:.2f} | Non-members: ${non_member_avg:.2f}")
        print(f"Percentage difference vs Non-members: {pct_diff:+.2f}%")
    else:
        print(f"Members: ${member_avg:.2f} | Non-members: ${non_member_avg:.2f}")
        print("Cannot compute percentage difference: Non-member average is 0.")
elif pd.notna(member_avg):
    print(f"Members: ${member_avg:.2f} | Non-members: N/A")
elif pd.notna(non_member_avg):
    print(f"Members: N/A | Non-members: ${non_member_avg:.2f}")
else:
    print("No valid transaction values in July 2024.")


Members: $8.80 | Non-members: $4.92
Percentage difference vs Non-members: +78.98%
