In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

In [2]:
from io import StringIO

data = """customer_id,tenure_months,monthly_charges,contract_type,customer_support_calls,payment_method,satisfaction_score,churn
CUST1000,39,30.55,Two Year,3,Debit Card,4,No
"""

df = pd.read_csv(StringIO(data))
df

Unnamed: 0,customer_id,tenure_months,monthly_charges,contract_type,customer_support_calls,payment_method,satisfaction_score,churn
0,CUST1000,39,30.55,Two Year,3,Debit Card,4,No


In [3]:
print("Hypothesis Testing")
print("H0 (Null Hypothesis): Customers who received a discount are NOT more likely to return.")
print("H1 (Alternative Hypothesis): Customers who received a discount are more likely to return.")

Hypothesis Testing
H0 (Null Hypothesis): Customers who received a discount are NOT more likely to return.
H1 (Alternative Hypothesis): Customers who received a discount are more likely to return.


In [4]:
# Create discount flag
df['received_discount'] = df['contract_type'].apply(
    lambda x: 'Yes' if x == 'Two Year' else 'No'
)

# Create retention flag (1 = retained, 0 = churned)
df['retained'] = df['churn'].apply(lambda x: 1 if x == 'No' else 0)

df[['customer_id', 'received_discount', 'retained']]


Unnamed: 0,customer_id,received_discount,retained
0,CUST1000,Yes,1


In [5]:
discount_group = df[df['received_discount'] == 'Yes']['retained']
no_discount_group = df[df['received_discount'] == 'No']['retained']

print("Discount group size:", len(discount_group))
print("No-discount group size:", len(no_discount_group))

Discount group size: 1
No-discount group size: 0


In [6]:
if len(discount_group) > 1 and len(no_discount_group) > 1:
    t_stat, p_value = ttest_ind(discount_group, no_discount_group)
    print("t-statistic:", t_stat)
    print("p-value:", p_value)
else:
    print("T-test cannot be performed due to insufficient data in one or both groups.")

T-test cannot be performed due to insufficient data in one or both groups.


In [7]:
print("Findings:")
print("- A t-test compares average retention between discount and non-discount customers.")
print("- This dataset has only one customer record.")
print("- Statistical significance cannot be evaluated with insufficient data.")

Findings:
- A t-test compares average retention between discount and non-discount customers.
- This dataset has only one customer record.
- Statistical significance cannot be evaluated with insufficient data.


In [8]:
print("Conclusion:")
print("There is no statistical evidence to support the hypothesis.")
print("A larger dataset with multiple customers is required to perform a valid t-test.")

Conclusion:
There is no statistical evidence to support the hypothesis.
A larger dataset with multiple customers is required to perform a valid t-test.


In [9]:
print("Reason:")
print("A t-test could not be conducted because the dataset lacks sufficient observations in both groups.")

Reason:
A t-test could not be conducted because the dataset lacks sufficient observations in both groups.
