In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind


In [2]:
from io import StringIO

data = """customer_id,tenure_months,monthly_charges,contract_type,customer_support_calls,payment_method,satisfaction_score,churn
CUST1000,39,30.55,Two Year,3,Debit Card,4,No
"""

df = pd.read_csv(StringIO(data))
df


Unnamed: 0,customer_id,tenure_months,monthly_charges,contract_type,customer_support_calls,payment_method,satisfaction_score,churn
0,CUST1000,39,30.55,Two Year,3,Debit Card,4,No


In [4]:
# Hypothesis Statements for Customer Discount Analysis

H0 = "Customers who received a discount are NOT more likely to return."
H1 = "Customers who received a discount are more likely to return."

print("Null Hypothesis (H₀):", H0)
print("Alternative Hypothesis (H₁):", H1)


Null Hypothesis (H₀): Customers who received a discount are NOT more likely to return.
Alternative Hypothesis (H₁): Customers who received a discount are more likely to return.


In [5]:
# Create discount flag
df['received_discount'] = df['contract_type'].apply(
    lambda x: 'Yes' if x == 'Two Year' else 'No'
)

# Create retention flag (1 = retained, 0 = churned)
df['retained'] = df['churn'].apply(lambda x: 1 if x == 'No' else 0)

df[['customer_id', 'received_discount', 'retained']]


Unnamed: 0,customer_id,received_discount,retained
0,CUST1000,Yes,1


In [6]:
discount_group = df[df['received_discount'] == 'Yes']['retained']
no_discount_group = df[df['received_discount'] == 'No']['retained']

discount_group, no_discount_group


(0    1
 Name: retained, dtype: int64,
 Series([], Name: retained, dtype: int64))

In [7]:
if len(discount_group) > 1 and len(no_discount_group) > 1:
    t_stat, p_value = ttest_ind(discount_group, no_discount_group)
    print("t-statistic:", t_stat)
    print("p-value:", p_value)
else:
    print("T-test cannot be performed: insufficient data in one or both groups.")


T-test cannot be performed: insufficient data in one or both groups.


In [8]:
# Findings (Plain English)

findings = [
    "A t-test compares the average retention between customers who received a discount and those who did not.",
    "In this dataset, there is only one customer record, so a statistical t-test cannot be meaningfully performed.",
    "With insufficient data, we cannot reject or accept the null hypothesis."
]

for i, point in enumerate(findings, start=1):
    print(f"{i}. {point}")


1. A t-test compares the average retention between customers who received a discount and those who did not.
2. In this dataset, there is only one customer record, so a statistical t-test cannot be meaningfully performed.
3. With insufficient data, we cannot reject or accept the null hypothesis.


In [9]:
# Conclusion

conclusion = (
    "Due to the limited dataset size, there is no statistical evidence to conclude "
    "that customers who received a discount are more likely to return. "
    "A larger dataset with multiple customers in both discount and non-discount "
    "groups is required to perform a valid t-test."
)

print(conclusion)


Due to the limited dataset size, there is no statistical evidence to conclude that customers who received a discount are more likely to return. A larger dataset with multiple customers in both discount and non-discount groups is required to perform a valid t-test.


In [10]:
# Reason for Not Conducting T-Test

reason = (
    "A t-test could not be conducted because the dataset does not contain "
    "enough observations in both groups to test statistical significance."
)

print(reason)


A t-test could not be conducted because the dataset does not contain enough observations in both groups to test statistical significance.
